adding support in tensorboard (#52)

* bug-fix in architecture.py where additional fetches would acquire more entries than it should * change in run_test to allow ignoring some test(s)
2026-03-18 15:53:35 +01:00 · 2018-02-05 15:21:49 +02:00
parent a8d5fb7bdf
commit 7c8962c991
10 changed files with 107 additions and 36 deletions
--- a/agents/agent.py
+++ b/agents/agent.py
@@ -91,7 +91,7 @@ class Agent(object):
        self.total_steps_counter = 0
        self.running_reward = None
        self.training_iteration = 0
-        self.current_episode = 0
+        self.current_episode = self.tp.current_episode = 0
        self.curr_state = []
        self.current_episode_steps_counter = 0
        self.episode_running_info = {}
@@ -406,6 +406,7 @@ class Agent(object):
                self.reset_game()

            self.current_episode += 1
+            self.tp.current_episode = self.current_episode

        # return episode really ended
        return result['done']
--- a/architectures/tensorflow_components/architecture.py
+++ b/architectures/tensorflow_components/architecture.py
@@ -21,6 +21,20 @@ from configurations import Preset, MiddlewareTypes
 import numpy as np
 import time

+def variable_summaries(var):
+    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
+    with tf.name_scope('summaries'):
+        layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2]
+
+        with tf.name_scope(layer_weight_name):
+            mean = tf.reduce_mean(var)
+            tf.summary.scalar('mean', mean)
+            with tf.name_scope('stddev'):
+              stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
+            tf.summary.scalar('stddev', stddev)
+            tf.summary.scalar('max', tf.reduce_max(var))
+            tf.summary.scalar('min', tf.reduce_min(var))
+            tf.summary.histogram('histogram', var)

 class TensorFlowArchitecture(Architecture):
    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
@@ -44,6 +58,7 @@ class TensorFlowArchitecture(Architecture):
        self.curr_rnn_c_in = None
        self.curr_rnn_h_in = None
        self.gradients_wrt_inputs = []
+        self.train_writer = None

        self.optimizer_type = self.tp.agent.optimizer_type
        if self.tp.seed is not None:
@@ -75,6 +90,8 @@ class TensorFlowArchitecture(Architecture):
            for idx, var in enumerate(self.trainable_weights):
                placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
                self.weights_placeholders.append(placeholder)
+                variable_summaries(var)
+
            self.update_weights_from_list = [weights.assign(holder) for holder, weights in
                                             zip(self.weights_placeholders, self.trainable_weights)]

@@ -106,12 +123,22 @@ class TensorFlowArchitecture(Architecture):
                self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
                    zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)

+            current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
+                                                        scope=tf.contrib.framework.get_name_scope())
+            self.merged = tf.summary.merge(current_scope_summaries)
+
            # initialize or restore model
            if not self.tp.distributed:
+                # Merge all the summaries
+
                self.init_op = tf.global_variables_initializer()

                if self.sess:
-                        self.sess.run(self.init_op)
+                    if self.tp.visualization.tensorboard:
+                        # Write the merged summaries to the current experiment directory
+                        self.train_writer = tf.summary.FileWriter(self.tp.experiment_path + '/tensorboard',
+                                                                  self.sess.graph)
+                    self.sess.run(self.init_op)

        self.accumulated_gradients = None

@@ -169,8 +196,12 @@ class TensorFlowArchitecture(Architecture):
                feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
                feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init

+            fetches += [self.merged]
+
            # get grads
            result = self.tp.sess.run(fetches, feed_dict=feed_dict)
+            if hasattr(self, 'train_writer') and self.train_writer is not None:
+                self.train_writer.add_summary(result[-1], self.tp.current_episode)

            # extract the fetches
            norm_unclipped_grads, grads, total_loss, losses = result[:4]
@@ -178,7 +209,8 @@ class TensorFlowArchitecture(Architecture):
                (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
            fetched_tensors = []
            if len(additional_fetches) > 0:
-                fetched_tensors = result[additional_fetches_start_idx:]
+                fetched_tensors = result[additional_fetches_start_idx:additional_fetches_start_idx +
+                                                                      len(additional_fetches)]

            # accumulate the gradients
            for idx, grad in enumerate(grads):
--- a/architectures/tensorflow_components/embedders.py
+++ b/architectures/tensorflow_components/embedders.py
@@ -59,13 +59,17 @@ class ImageEmbedder(InputEmbedder):
            # same embedder as used in the original DQN paper
            self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
                                                      filters=32, kernel_size=(8, 8), strides=(4, 4),
-                                                      activation=self.activation_function, data_format='channels_last')
+                                                      activation=self.activation_function, data_format='channels_last',
+                                                      name='conv1')
            self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
                                                      filters=64, kernel_size=(4, 4), strides=(2, 2),
-                                                      activation=self.activation_function, data_format='channels_last')
+                                                      activation=self.activation_function, data_format='channels_last',
+                                                      name='conv2')
            self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
                                                      filters=64, kernel_size=(3, 3), strides=(1, 1),
-                                                      activation=self.activation_function, data_format='channels_last')
+                                                      activation=self.activation_function, data_format='channels_last',
+                                                      name='conv3'
+                                                      )

            self.output = tf.contrib.layers.flatten(self.observation_conv3)

@@ -73,28 +77,36 @@ class ImageEmbedder(InputEmbedder):
            # the embedder used in the CARLA papers
            self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
                                                 filters=32, kernel_size=(5, 5), strides=(2, 2),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv1')
            self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
                                                 filters=32, kernel_size=(3, 3), strides=(1, 1),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv2')
            self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
                                                 filters=64, kernel_size=(3, 3), strides=(2, 2),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv3')
            self.observation_conv4 = tf.layers.conv2d(self.observation_conv3,
                                                 filters=64, kernel_size=(3, 3), strides=(1, 1),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv4')
            self.observation_conv5 = tf.layers.conv2d(self.observation_conv4,
                                                 filters=128, kernel_size=(3, 3), strides=(2, 2),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv5')
            self.observation_conv6 = tf.layers.conv2d(self.observation_conv5,
                                                 filters=128, kernel_size=(3, 3), strides=(1, 1),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv6')
            self.observation_conv7 = tf.layers.conv2d(self.observation_conv6,
                                                 filters=256, kernel_size=(3, 3), strides=(2, 2),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv7')
            self.observation_conv8 = tf.layers.conv2d(self.observation_conv7,
                                                 filters=256, kernel_size=(3, 3), strides=(1, 1),
-                                                 activation=self.activation_function, data_format='channels_last')
+                                                 activation=self.activation_function, data_format='channels_last',
+                                                 name='conv8')

            self.output = tf.contrib.layers.flatten(self.observation_conv8)
        else:
@@ -111,12 +123,16 @@ class VectorEmbedder(InputEmbedder):
        input_layer = tf.contrib.layers.flatten(self.input)

        if self.embedder_complexity == EmbedderComplexity.Shallow:
-            self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function)
+            self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function,
+                                                 name='fc1')

        elif self.embedder_complexity == EmbedderComplexity.Deep:
            # the embedder used in the CARLA papers
-            self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function)
-            self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function)
-            self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function)
+            self.observation_fc1 = tf.layers.dense(input_layer, 128, activation=self.activation_function,
+                                                 name='fc1')
+            self.observation_fc2 = tf.layers.dense(self.observation_fc1, 128, activation=self.activation_function,
+                                                 name='fc2')
+            self.output = tf.layers.dense(self.observation_fc2, 128, activation=self.activation_function,
+                                                 name='fc3')
        else:
            raise ValueError("The defined embedder complexity value is invalid")
--- a/architectures/tensorflow_components/general_network.py
+++ b/architectures/tensorflow_components/general_network.py
@@ -171,6 +171,8 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
        self.losses = tf.losses.get_losses(self.name)
        self.losses += tf.losses.get_regularization_losses(self.name)
        self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
+        tf.summary.scalar('total_loss', self.total_loss)
+

        # Learning rate
        if self.tp.learning_rate_decay_rate != 0:
--- a/architectures/tensorflow_components/heads.py
+++ b/architectures/tensorflow_components/heads.py
@@ -125,14 +125,14 @@ class DuelingQHead(QHead):
    def _build_module(self, input_layer):
        # state value tower - V
        with tf.variable_scope("state_value"):
-            state_value = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
-            state_value = tf.layers.dense(state_value, 1)
+            state_value = tf.layers.dense(input_layer, 256, activation=tf.nn.relu, name='fc1')
+            state_value = tf.layers.dense(state_value, 1, name='fc2')
            # state_value = tf.expand_dims(state_value, axis=-1)

        # action advantage tower - A
        with tf.variable_scope("action_advantage"):
-            action_advantage = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
-            action_advantage = tf.layers.dense(action_advantage, self.num_actions)
+            action_advantage = tf.layers.dense(input_layer, 256, activation=tf.nn.relu, name='fc1')
+            action_advantage = tf.layers.dense(action_advantage, self.num_actions, name='fc2')
            action_advantage = action_advantage - tf.reduce_mean(action_advantage)

        # merge to state-action value function Q
@@ -177,7 +177,7 @@ class PolicyHead(Head):

        # Policy Head
        if self.discrete_controls:
-            policy_values = tf.layers.dense(input_layer, self.num_actions)
+            policy_values = tf.layers.dense(input_layer, self.num_actions, name='fc')
            self.policy_mean = tf.nn.softmax(policy_values, name="policy")

            # define the distributions for the policy and the old policy
@@ -186,7 +186,7 @@ class PolicyHead(Head):
            self.output = self.policy_mean
        else:
            # mean
-            policy_values_mean = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh)
+            policy_values_mean = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh, name='fc_mean')
            self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')

            self.output = [self.policy_mean]
@@ -194,7 +194,7 @@ class PolicyHead(Head):
            # std
            if self.exploration_policy == 'ContinuousEntropy':
                policy_values_std = tf.layers.dense(input_layer, self.num_actions,
-                                            kernel_initializer=normalized_columns_initializer(0.01))
+                                            kernel_initializer=normalized_columns_initializer(0.01), name='fc_std')
                self.policy_std = tf.nn.softplus(policy_values_std, name='output_variance') + eps

                self.output.append(self.policy_std)
@@ -239,14 +239,15 @@ class MeasurementsPredictionHead(Head):
        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
        # actions expectation tower (expectation stream) - E
        with tf.variable_scope("expectation_stream"):
-            expectation_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
-            expectation_stream = tf.layers.dense(expectation_stream, self.multi_step_measurements_size)
+            expectation_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu, name='fc1')
+            expectation_stream = tf.layers.dense(expectation_stream, self.multi_step_measurements_size, name='output')
            expectation_stream = tf.expand_dims(expectation_stream, axis=1)

        # action fine differences tower (action stream) - A
        with tf.variable_scope("action_stream"):
-            action_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
-            action_stream = tf.layers.dense(action_stream, self.num_actions * self.multi_step_measurements_size)
+            action_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu, name='fc1')
+            action_stream = tf.layers.dense(action_stream, self.num_actions * self.multi_step_measurements_size,
+                                            name='output')
            action_stream = tf.reshape(action_stream,
                                       (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
            action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True)
@@ -393,7 +394,7 @@ class PPOHead(Head):
        # Policy Head
        if self.discrete_controls:
            self.input = [self.actions, self.old_policy_mean]
-            policy_values = tf.layers.dense(input_layer, self.num_actions)
+            policy_values = tf.layers.dense(input_layer, self.num_actions, name='policy_fc')
            self.policy_mean = tf.nn.softmax(policy_values, name="policy")

            # define the distributions for the policy and the old policy
@@ -488,7 +489,7 @@ class CategoricalQHead(Head):
        self.actions = tf.placeholder(tf.int32, [None], name="actions")
        self.input = [self.actions]

-        values_distribution = tf.layers.dense(input_layer, self.num_actions * self.num_atoms)
+        values_distribution = tf.layers.dense(input_layer, self.num_actions * self.num_atoms, name='output')
        values_distribution = tf.reshape(values_distribution, (tf.shape(values_distribution)[0], self.num_actions, self.num_atoms))
        # softmax on atoms dimension
        self.output = tf.nn.softmax(values_distribution)
@@ -514,7 +515,7 @@ class QuantileRegressionQHead(Head):
        self.input = [self.actions, self.quantile_midpoints]

        # the output of the head is the N unordered quantile locations {theta_1, ..., theta_N}
-        quantiles_locations = tf.layers.dense(input_layer, self.num_actions * self.num_atoms)
+        quantiles_locations = tf.layers.dense(input_layer, self.num_actions * self.num_atoms, name='output')
        quantiles_locations = tf.reshape(quantiles_locations, (tf.shape(quantiles_locations)[0], self.num_actions, self.num_atoms))
        self.output = quantiles_locations

--- a/architectures/tensorflow_components/middleware.py
+++ b/architectures/tensorflow_components/middleware.py
@@ -50,7 +50,7 @@ class LSTM_Embedder(MiddlewareEmbedder):
        which would definitely be wrong. need to double check the shape
        """

-        middleware = tf.layers.dense(self.input, 512, activation=self.activation_function)
+        middleware = tf.layers.dense(self.input, 512, activation=self.activation_function, name='fc1')
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
        self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
        self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
@@ -70,4 +70,4 @@ class LSTM_Embedder(MiddlewareEmbedder):

 class FC_Embedder(MiddlewareEmbedder):
    def _build_module(self):
-        self.output = tf.layers.dense(self.input, 512, activation=self.activation_function)
+        self.output = tf.layers.dense(self.input, 512, activation=self.activation_function, name='fc1')
--- a/coach.py
+++ b/coach.py
@@ -178,6 +178,7 @@ def check_input_and_fill_run_dict(parser):
    # visualization
    run_dict['visualization.dump_gifs'] = args.dump_gifs
    run_dict['visualization.render'] = args.render
+    run_dict['visualization.tensorboard'] = args.tensorboard

    return args, run_dict

@@ -276,6 +277,9 @@ if __name__ == "__main__":
    parser.add_argument('--print_parameters',
                        help="(flag) Print tuning_parameters to stdout",
                        action='store_true')
+    parser.add_argument('-tb', '--tensorboard',
+                        help="(flag) When using the TensorFlow backend, enable TensorBoard log dumps. ",
+                        action='store_true')

    args, run_dict = check_input_and_fill_run_dict(parser)

--- a/configurations.py
+++ b/configurations.py
@@ -226,6 +226,7 @@ class GeneralParameters(Parameters):
    evaluate_every_x_episodes = 1000000
    evaluate_every_x_training_iterations = 0
    rescaling_interpolation_type = 'bilinear'
+    current_episode = 0

    # setting a seed will only work for non-parallel algorithms. Parallel algorithms add uncontrollable noise in
    # the form of different workers starting at different times, and getting different assignments of CPU
@@ -253,6 +254,7 @@ class VisualizationParameters(Parameters):
    render = False
    dump_gifs = True
    max_fps_for_human_control = 10
+    tensorboard = False


 class Roboschool(EnvironmentParameters):
--- a/parallel_actor.py
+++ b/parallel_actor.py
@@ -157,6 +157,12 @@ if __name__ == "__main__":
        for network in agent.networks:
            network.set_session(sess)

+        if tuning_parameters.visualization.tensorboard:
+            # Write the merged summaries to the current experiment directory
+            agent.main_network.online_network.train_writer = tf.summary.FileWriter(
+                tuning_parameters.experiment_path + '/tensorboard_worker{}'.format(tuning_parameters.task_id),
+                sess.graph)
+
        # Start the training or evaluation
        if tuning_parameters.evaluate_only:
            agent.evaluate(sys.maxsize, keep_networks_synced=True)  # evaluate forever
--- a/run_test.py
+++ b/run_test.py
@@ -37,6 +37,10 @@ if __name__ == '__main__':
                        help="(string) Name of a preset to run (as configured in presets.py)",
                        default=None,
                        type=str)
+    parser.add_argument('-ip', '--ignore_presets',
+                        help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
+                        default=None,
+                        type=str)
    parser.add_argument('-itf', '--ignore_tensorflow',
                        help="(flag) Don't test TensorFlow presets.",
                        action='store_true')
@@ -65,10 +69,13 @@ if __name__ == '__main__':
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)
-
+    if args.ignore_presets is not None:
+        presets_to_ignore = args.ignore_presets.split(',')
+    else:
+        presets_to_ignore = []
    for idx, preset_name in enumerate(presets_lists):
        preset = eval('presets.{}()'.format(preset_name))
-        if preset.test:
+        if preset.test and preset_name not in presets_to_ignore:
            frameworks = []
            if preset.agent.tensorflow_support and not args.ignore_tensorflow:
                frameworks.append('tensorflow')