coach/architectures/tensorflow_components/architecture.py

#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time

import tensorflow as tf

from architectures import architecture
import configurations as conf
import utils

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
        layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2]

        with tf.name_scope(layer_weight_name):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
              stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)

class TensorFlowArchitecture(architecture.Architecture):
    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
        """
        :param tuning_parameters: The parameters used for running the algorithm
        :type tuning_parameters: Preset
        :param name: The name of the network
        """
        architecture.Architecture.__init__(self, tuning_parameters, name)
        self.middleware_embedder = None
        self.network_is_local = network_is_local
        assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
        self.sess = tuning_parameters.sess
        self.inputs = {}
        self.outputs = []
        self.targets = []
        self.losses = []
        self.total_loss = None
        self.trainable_weights = []
        self.weights_placeholders = []
        self.curr_rnn_c_in = None
        self.curr_rnn_h_in = None
        self.gradients_wrt_inputs = []
        self.train_writer = None

        self.optimizer_type = self.tp.agent.optimizer_type
        if self.tp.seed is not None:
            tf.set_random_seed(self.tp.seed)
        with tf.variable_scope(self.name, initializer=tf.contrib.layers.xavier_initializer()):
            self.global_step = tf.train.get_or_create_global_step()

            # build the network
            self.get_model(tuning_parameters)

            # model weights
            self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)

            # locks for synchronous training
            if self.tp.distributed and not self.tp.agent.async_training and not self.network_is_local:
                self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
                                                    initializer=tf.constant_initializer(0, dtype=tf.int32),
                                                    trainable=False)
                self.lock = self.lock_counter.assign_add(1, use_locking=True)
                self.lock_init = self.lock_counter.assign(0)

                self.release_counter = tf.get_variable("release_counter", [], tf.int32,
                                                       initializer=tf.constant_initializer(0, dtype=tf.int32),
                                                       trainable=False)
                self.release = self.release_counter.assign_add(1, use_locking=True)
                self.release_init = self.release_counter.assign(0)

            # local network does the optimization so we need to create all the ops we are going to use to optimize
            for idx, var in enumerate(self.trainable_weights):
                placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
                self.weights_placeholders.append(placeholder)
                if self.tp.visualization.tensorboard:
                    variable_summaries(var)

            self.update_weights_from_list = [weights.assign(holder) for holder, weights in
                                             zip(self.weights_placeholders, self.trainable_weights)]

            # gradients ops
            self.tensor_gradients = tf.gradients(self.total_loss, self.trainable_weights)
            self.gradients_norm = tf.global_norm(self.tensor_gradients)
            if self.tp.clip_gradients is not None and self.tp.clip_gradients != 0:
                self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients,
                                                                             tuning_parameters.clip_gradients)

            # gradients of the outputs w.r.t. the inputs
            # at the moment, this is only used by ddpg
            if len(self.outputs) == 1:
                self.gradients_wrt_inputs = {name: tf.gradients(self.outputs[0], input_ph) for name, input_ph in self.inputs.items()}
                self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
                self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)

            # L2 regularization
            if self.tp.agent.l2_regularization != 0:
                self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.trainable_weights])
                                          * self.tp.agent.l2_regularization]
                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)

            self.inc_step = self.global_step.assign_add(1)

            # defining the optimization process (for LBFGS we have less control over the optimizer)
            if self.optimizer_type != 'LBFGS':
                # no global network, this is a plain simple centralized training
                self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
                    zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)

            current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                        scope=tf.contrib.framework.get_name_scope())
            self.merged = tf.summary.merge(current_scope_summaries)

            # initialize or restore model
            if not self.tp.distributed:
                # Merge all the summaries

                self.init_op = tf.global_variables_initializer()

                if self.sess:
                    if self.tp.visualization.tensorboard:
                        # Write the merged summaries to the current experiment directory
                        self.train_writer = tf.summary.FileWriter(self.tp.experiment_path + '/tensorboard',
                                                                  self.sess.graph)
                    self.sess.run(self.init_op)

        self.accumulated_gradients = None

    def reset_accumulated_gradients(self):
        """
        Reset the gradients accumulation placeholder
        """
        if self.accumulated_gradients is None:
            self.accumulated_gradients = self.tp.sess.run(self.trainable_weights)

        for ix, grad in enumerate(self.accumulated_gradients):
            self.accumulated_gradients[ix] = grad * 0

    def accumulate_gradients(self, inputs, targets, additional_fetches=None):
        """
        Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation
        placeholders
        :param additional_fetches: Optional tensors to fetch during gradients calculation
        :param inputs: The input batch for the network
        :param targets: The targets corresponding to the input batch
        :return: A list containing the total loss and the individual network heads losses
        """

        if self.accumulated_gradients is None:
            self.reset_accumulated_gradients()

        # feed inputs
        if additional_fetches is None:
            additional_fetches = []

        feed_dict = self._feed_dict(inputs)

        # feed targets
        targets = utils.force_list(targets)
        for placeholder_idx, target in enumerate(targets):
            feed_dict[self.targets[placeholder_idx]] = target

        if self.optimizer_type != 'LBFGS':
            # set the fetches
            fetches = [self.gradients_norm]
            if self.tp.clip_gradients:
                fetches.append(self.clipped_grads)
            else:
                fetches.append(self.tensor_gradients)
            fetches += [self.total_loss, self.losses]
            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                fetches.append(self.middleware_embedder.state_out)
            additional_fetches_start_idx = len(fetches)
            fetches += additional_fetches

            # feed the lstm state if necessary
            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                # we can't always assume that we are starting from scratch here can we?
                feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
                feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init

            fetches += [self.merged]

            # get grads
            result = self.tp.sess.run(fetches, feed_dict=feed_dict)
            if hasattr(self, 'train_writer') and self.train_writer is not None:
                self.train_writer.add_summary(result[-1], self.tp.current_episode)

            # extract the fetches
            norm_unclipped_grads, grads, total_loss, losses = result[:4]
            if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
                (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
            fetched_tensors = []
            if len(additional_fetches) > 0:
                fetched_tensors = result[additional_fetches_start_idx:additional_fetches_start_idx +
                                                                      len(additional_fetches)]

            # accumulate the gradients
            for idx, grad in enumerate(grads):
                self.accumulated_gradients[idx] += grad

            return total_loss, losses, norm_unclipped_grads, fetched_tensors

        else:
            self.optimizer.minimize(session=self.tp.sess, feed_dict=feed_dict)

            return [0]

    def apply_and_reset_gradients(self, gradients, scaler=1.):
        """
        Applies the given gradients to the network weights and resets the accumulation placeholder
        :param gradients: The gradients to use for the update
        :param scaler: A scaling factor that allows rescaling the gradients before applying them
        """
        self.apply_gradients(gradients, scaler)
        self.reset_accumulated_gradients()

    def apply_gradients(self, gradients, scaler=1.):
        """
        Applies the given gradients to the network weights
        :param gradients: The gradients to use for the update
        :param scaler: A scaling factor that allows rescaling the gradients before applying them
        """
        if self.tp.agent.async_training or not self.tp.distributed:
            if hasattr(self, 'global_step') and not self.network_is_local:
                self.tp.sess.run(self.inc_step)

        if self.optimizer_type != 'LBFGS':

            # lock barrier
            if hasattr(self, 'lock_counter'):
                self.tp.sess.run(self.lock)
                while self.tp.sess.run(self.lock_counter) % self.tp.num_threads != 0:
                    time.sleep(0.00001)
                # rescale the gradients so that they average out with the gradients from the other workers
                scaler /= float(self.tp.num_threads)

            # apply gradients
            if scaler != 1.:
                for gradient in gradients:
                    gradient /= scaler
            feed_dict = dict(zip(self.weights_placeholders, gradients))
            _ = self.tp.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict)

            # release barrier
            if hasattr(self, 'release_counter'):
                self.tp.sess.run(self.release)
                while self.tp.sess.run(self.release_counter) % self.tp.num_threads != 0:
                    time.sleep(0.00001)

    def _feed_dict(self, inputs):
        feed_dict = {}
        for input_name, input_value in inputs.items():
            if isinstance(input_name, str):
                if input_name not in self.inputs:
                    raise ValueError((
                        'input name {input_name} was provided to create a feed '
                        'dictionary, but there is no placeholder with that name. '
                        'placeholder names available include: {placeholder_names}'
                    ).format(
                        input_name=input_name,
                        placeholder_names=', '.join(self.inputs.keys())
                    ))

                feed_dict[self.inputs[input_name]] = input_value
            elif isinstance(input_name, tf.Tensor) and input_name.op.type == 'Placeholder':
                feed_dict[input_name] = input_value
            else:
                raise ValueError((
                    'input dictionary expects strings or placeholders as keys, '
                    'but found key {key} of type {type}'
                ).format(
                    key=input_name,
                    type=type(input_name),
                ))

        return feed_dict

    def predict(self, inputs, outputs=None, squeeze_output=True):
        """
        Run a forward pass of the network using the given input
        :param inputs: The input for the network
        :param outputs: The output for the network, defaults to self.outputs
        :param squeeze_output: call squeeze_list on output
        :return: The network output

        WARNING: must only call once per state since each call is assumed by LSTM to be a new time step.
        """
        feed_dict = self._feed_dict(inputs)
        if outputs is None:
            outputs = self.outputs

        if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
            feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
            feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in

            output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.tp.sess.run([outputs, self.middleware_embedder.state_out], feed_dict=feed_dict)
        else:
            output = self.tp.sess.run(outputs, feed_dict)

        if squeeze_output:
            output = utils.squeeze_list(output)

        return output

    def get_weights(self):
        """
        :return: a list of tensors containing the network weights for each layer
        """
        return self.trainable_weights

    def set_weights(self, weights, new_rate=1.0):
        """
        Sets the network weights from the given list of weights tensors
        """
        feed_dict = {}
        old_weights, new_weights = self.tp.sess.run([self.get_weights(), weights])
        for placeholder_idx, new_weight in enumerate(new_weights):
            feed_dict[self.weights_placeholders[placeholder_idx]]\
                = new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx]
        self.tp.sess.run(self.update_weights_from_list, feed_dict)

    def write_graph_to_logdir(self, summary_dir):
        """
        Writes the tensorflow graph to the logdir for tensorboard visualization
        :param summary_dir: the path to the logdir
        """
        summary_writer = tf.summary.FileWriter(summary_dir)
        summary_writer.add_graph(self.sess.graph)

    def get_variable_value(self, variable):
        """
        Get the value of a variable from the graph
        :param variable: the variable
        :return: the value of the variable
        """
        return self.sess.run(variable)

    def set_variable_value(self, assign_op, value, placeholder=None):
        """
        Updates the value of a variable.
        This requires having an assign operation for the variable, and a placeholder which will provide the value
        :param assign_op: an assign operation for the variable
        :param value: a value to set the variable to
        :param placeholder: a placeholder to hold the given value for injecting it into the variable
        """
        self.sess.run(assign_op, feed_dict={placeholder: value})