1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00

Parallel agents fixes (#95)

* Parallel agents related bug fixes: checkpoint restore, tensorboard integration.
Adding narrow networks support.
Reference code for unlimited number of checkpoints
This commit is contained in:
Itai Caspi
2018-05-24 14:24:19 +03:00
committed by GitHub
parent 6c0b59b4de
commit d302168c8c
10 changed files with 75 additions and 41 deletions

View File

@@ -15,18 +15,20 @@
#
import tensorflow as tf
from configurations import EmbedderComplexity
from configurations import EmbedderDepth, EmbedderWidth
class InputEmbedder(object):
def __init__(self, input_size, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
name="embedder"):
self.name = name
self.input_size = input_size
self.activation_function = activation_function
self.input = None
self.output = None
self.embedder_complexity = embedder_complexity
self.embedder_depth = embedder_depth
self.embedder_width = embedder_width
def __call__(self, prev_input_placeholder=None):
with tf.variable_scope(self.get_name()):
@@ -47,15 +49,16 @@ class InputEmbedder(object):
class ImageEmbedder(InputEmbedder):
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)
self.input_rescaler = input_rescaler
def _build_module(self):
# image observation
rescaled_observation_stack = self.input / self.input_rescaler
if self.embedder_complexity == EmbedderComplexity.Shallow:
if self.embedder_depth == EmbedderDepth.Shallow:
# same embedder as used in the original DQN paper
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(8, 8), strides=(4, 4),
@@ -73,7 +76,7 @@ class ImageEmbedder(InputEmbedder):
self.output = tf.contrib.layers.flatten(self.observation_conv3)
elif self.embedder_complexity == EmbedderComplexity.Deep:
elif self.embedder_depth == EmbedderDepth.Deep:
# the embedder used in the CARLA papers
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(5, 5), strides=(2, 2),
@@ -115,24 +118,27 @@ class ImageEmbedder(InputEmbedder):
class VectorEmbedder(InputEmbedder):
def __init__(self, input_size, activation_function=tf.nn.relu,
embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)
def _build_module(self):
# vector observation
input_layer = tf.contrib.layers.flatten(self.input)
if self.embedder_complexity == EmbedderComplexity.Shallow:
self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function,
width = 128 if self.embedder_width == EmbedderWidth.Wide else 32
if self.embedder_depth == EmbedderDepth.Shallow:
self.output = tf.layers.dense(input_layer, 2*width, activation=self.activation_function,
name='fc1')
elif self.embedder_complexity == EmbedderComplexity.Deep:
elif self.embedder_depth == EmbedderDepth.Deep:
# the embedder used in the CARLA papers
self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function,
self.observation_fc1 = tf.layers.dense(input_layer, width, activation=self.activation_function,
name='fc1')
self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function,
self.observation_fc2 = tf.layers.dense(self.observation_fc1, width, activation=self.activation_function,
name='fc2')
self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function,
self.output = tf.layers.dense(self.observation_fc2, width, activation=self.activation_function,
name='fc3')
else:
raise ValueError("The defined embedder complexity value is invalid")

View File

@@ -36,6 +36,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
self.output_heads = []
self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function)
self.embedder_width = tuning_parameters.agent.embedder_width
TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
@@ -57,22 +58,26 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
def get_observation_embedding(with_timestep=False):
if self.input_height > 1:
return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
input_rescaler=self.tp.agent.input_rescaler)
input_rescaler=self.tp.agent.input_rescaler, embedder_width=self.embedder_width)
else:
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation",
embedder_width=self.embedder_width)
input_mapping = {
InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements",
embedder_width=self.embedder_width),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector",
embedder_width=self.embedder_width),
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action",
embedder_width=self.embedder_width),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
}
return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: LSTM_Embedder,
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function, self.embedder_width)
def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = {
@@ -174,7 +179,8 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
self.losses = tf.losses.get_losses(self.name)
self.losses += tf.losses.get_regularization_losses(self.name)
self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
tf.summary.scalar('total_loss', self.total_loss)
if self.tp.visualization.tensorboard:
tf.summary.scalar('total_loss', self.total_loss)
# Learning rate

View File

@@ -395,7 +395,6 @@ class PPOHead(Head):
def _build_module(self, input_layer):
eps = 1e-15
if self.discrete_controls:
self.actions = tf.placeholder(tf.int32, [None], name="actions")
else:
@@ -410,7 +409,7 @@ class PPOHead(Head):
self.policy_mean = tf.nn.softmax(policy_values, name="policy")
# define the distributions for the policy and the old policy
self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_mean + eps))
self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
self.output = self.policy_mean
@@ -445,7 +444,7 @@ class PPOHead(Head):
# calculate surrogate loss
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
self.target = self.advantages
self.likelihood_ratio = self.action_probs_wrt_policy / self.action_probs_wrt_old_policy
self.likelihood_ratio = self.action_probs_wrt_policy / (self.action_probs_wrt_old_policy + eps)
if self.clip_likelihood_ratio_using_epsilon is not None:
max_value = 1 + self.clip_likelihood_ratio_using_epsilon
min_value = 1 - self.clip_likelihood_ratio_using_epsilon

View File

@@ -16,13 +16,15 @@
import tensorflow as tf
import numpy as np
from configurations import EmbedderWidth
class MiddlewareEmbedder(object):
def __init__(self, activation_function=tf.nn.relu, name="middleware_embedder"):
def __init__(self, activation_function=tf.nn.relu, embedder_width=EmbedderWidth.Wide, name="middleware_embedder"):
self.name = name
self.input = None
self.output = None
self.embedder_width = embedder_width
self.activation_function = activation_function
def __call__(self, input_layer):
@@ -70,4 +72,6 @@ class LSTM_Embedder(MiddlewareEmbedder):
class FC_Embedder(MiddlewareEmbedder):
def _build_module(self):
self.output = tf.layers.dense(self.input, 512, activation=self.activation_function, name='fc1')
width = 512 if self.embedder_width == EmbedderWidth.Wide else 64
self.output = tf.layers.dense(self.input, width, activation=self.activation_function, name='fc1')