Parallel agents fixes (#95)

* Parallel agents related bug fixes: checkpoint restore, tensorboard integration. Adding narrow networks support. Reference code for unlimited number of checkpoints
2026-02-10 10:35:48 +01:00 · 2018-05-24 14:24:19 +03:00
parent 6c0b59b4de
commit d302168c8c
10 changed files with 75 additions and 41 deletions
--- a/architectures/tensorflow_components/embedders.py
+++ b/architectures/tensorflow_components/embedders.py
@@ -15,18 +15,20 @@
 #

 import tensorflow as tf
-from configurations import EmbedderComplexity
+from configurations import EmbedderDepth, EmbedderWidth


 class InputEmbedder(object):
    def __init__(self, input_size, activation_function=tf.nn.relu,
-                 embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
+                 embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
+                 name="embedder"):
        self.name = name
        self.input_size = input_size
        self.activation_function = activation_function
        self.input = None
        self.output = None
-        self.embedder_complexity = embedder_complexity
+        self.embedder_depth = embedder_depth
+        self.embedder_width = embedder_width

    def __call__(self, prev_input_placeholder=None):
        with tf.variable_scope(self.get_name()):
@@ -47,15 +49,16 @@ class InputEmbedder(object):

 class ImageEmbedder(InputEmbedder):
    def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu,
-                 embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
-        InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
+                 embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
+                 name="embedder"):
+        InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)
        self.input_rescaler = input_rescaler

    def _build_module(self):
        # image observation
        rescaled_observation_stack = self.input / self.input_rescaler

-        if self.embedder_complexity == EmbedderComplexity.Shallow:
+        if self.embedder_depth == EmbedderDepth.Shallow:
            # same embedder as used in the original DQN paper
            self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
                                                      filters=32, kernel_size=(8, 8), strides=(4, 4),
@@ -73,7 +76,7 @@ class ImageEmbedder(InputEmbedder):

            self.output = tf.contrib.layers.flatten(self.observation_conv3)

-        elif self.embedder_complexity == EmbedderComplexity.Deep:
+        elif self.embedder_depth == EmbedderDepth.Deep:
            # the embedder used in the CARLA papers
            self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
                                                 filters=32, kernel_size=(5, 5), strides=(2, 2),
@@ -115,24 +118,27 @@ class ImageEmbedder(InputEmbedder):

 class VectorEmbedder(InputEmbedder):
    def __init__(self, input_size, activation_function=tf.nn.relu,
-                 embedder_complexity=EmbedderComplexity.Shallow, name="embedder"):
-        InputEmbedder.__init__(self, input_size, activation_function, embedder_complexity, name)
+                 embedder_depth=EmbedderDepth.Shallow, embedder_width=EmbedderWidth.Wide,
+                 name="embedder"):
+        InputEmbedder.__init__(self, input_size, activation_function, embedder_depth, embedder_width, name)

    def _build_module(self):
        # vector observation
        input_layer = tf.contrib.layers.flatten(self.input)

-        if self.embedder_complexity == EmbedderComplexity.Shallow:
-            self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function,
+        width = 128 if self.embedder_width == EmbedderWidth.Wide else 32
+
+        if self.embedder_depth == EmbedderDepth.Shallow:
+            self.output = tf.layers.dense(input_layer, 2*width, activation=self.activation_function,
                                                 name='fc1')

-        elif self.embedder_complexity == EmbedderComplexity.Deep:
+        elif self.embedder_depth == EmbedderDepth.Deep:
            # the embedder used in the CARLA papers
-            self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function,
+            self.observation_fc1 = tf.layers.dense(input_layer, width, activation=self.activation_function,
                                                 name='fc1')
-            self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function,
+            self.observation_fc2 = tf.layers.dense(self.observation_fc1, width, activation=self.activation_function,
                                                 name='fc2')
-            self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function,
+            self.output = tf.layers.dense(self.observation_fc2, width, activation=self.activation_function,
                                                 name='fc3')
        else:
            raise ValueError("The defined embedder complexity value is invalid")
--- a/architectures/tensorflow_components/general_network.py
+++ b/architectures/tensorflow_components/general_network.py
@@ -36,6 +36,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
        self.output_heads = []
        self.activation_function = self.get_activation_function(
            tuning_parameters.agent.hidden_layers_activation_function)
+        self.embedder_width = tuning_parameters.agent.embedder_width

        TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)

@@ -57,22 +58,26 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
        def get_observation_embedding(with_timestep=False):
            if self.input_height > 1:
                return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation",
-                                     input_rescaler=self.tp.agent.input_rescaler)
+                                     input_rescaler=self.tp.agent.input_rescaler, embedder_width=self.embedder_width)
            else:
-                return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
+                return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation",
+                                      embedder_width=self.embedder_width)

        input_mapping = {
            InputTypes.Observation: get_observation_embedding(),
-            InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
-            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
-            InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
+            InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements",
+                                                    embedder_width=self.embedder_width),
+            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector",
+                                                  embedder_width=self.embedder_width),
+            InputTypes.Action: VectorEmbedder((self.num_actions,), name="action",
+                                              embedder_width=self.embedder_width),
            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
        }
        return input_mapping[embedder_type]

    def get_middleware_embedder(self, middleware_type):
        return {MiddlewareTypes.LSTM: LSTM_Embedder,
-                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function, self.embedder_width)

    def get_output_head(self, head_type, head_idx, loss_weight=1.):
        output_mapping = {
@@ -174,7 +179,8 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
        self.losses = tf.losses.get_losses(self.name)
        self.losses += tf.losses.get_regularization_losses(self.name)
        self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
-        tf.summary.scalar('total_loss', self.total_loss)
+        if self.tp.visualization.tensorboard:
+            tf.summary.scalar('total_loss', self.total_loss)


        # Learning rate
--- a/architectures/tensorflow_components/heads.py
+++ b/architectures/tensorflow_components/heads.py
@@ -395,7 +395,6 @@ class PPOHead(Head):

    def _build_module(self, input_layer):
        eps = 1e-15
-
        if self.discrete_controls:
            self.actions = tf.placeholder(tf.int32, [None], name="actions")
        else:
@@ -410,7 +409,7 @@ class PPOHead(Head):
            self.policy_mean = tf.nn.softmax(policy_values, name="policy")

            # define the distributions for the policy and the old policy
-            self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
+            self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_mean + eps))
            self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)

            self.output = self.policy_mean
@@ -445,7 +444,7 @@ class PPOHead(Head):
        # calculate surrogate loss
        self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
        self.target = self.advantages
-        self.likelihood_ratio = self.action_probs_wrt_policy / self.action_probs_wrt_old_policy
+        self.likelihood_ratio = self.action_probs_wrt_policy / (self.action_probs_wrt_old_policy + eps)
        if self.clip_likelihood_ratio_using_epsilon is not None:
            max_value = 1 + self.clip_likelihood_ratio_using_epsilon
            min_value = 1 - self.clip_likelihood_ratio_using_epsilon
--- a/architectures/tensorflow_components/middleware.py
+++ b/architectures/tensorflow_components/middleware.py
@@ -16,13 +16,15 @@

 import tensorflow as tf
 import numpy as np
+from configurations import EmbedderWidth


 class MiddlewareEmbedder(object):
-    def __init__(self, activation_function=tf.nn.relu, name="middleware_embedder"):
+    def __init__(self, activation_function=tf.nn.relu, embedder_width=EmbedderWidth.Wide, name="middleware_embedder"):
        self.name = name
        self.input = None
        self.output = None
+        self.embedder_width = embedder_width
        self.activation_function = activation_function

    def __call__(self, input_layer):
@@ -70,4 +72,6 @@ class LSTM_Embedder(MiddlewareEmbedder):

 class FC_Embedder(MiddlewareEmbedder):
    def _build_module(self):
-        self.output = tf.layers.dense(self.input, 512, activation=self.activation_function, name='fc1')
+        width = 512 if self.embedder_width == EmbedderWidth.Wide else 64
+        self.output = tf.layers.dense(self.input, width, activation=self.activation_function, name='fc1')
+