# # Copyright (c) 2017 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import math import time from typing import List, Union import numpy as np import tensorflow as tf from rl_coach.architectures.architecture import Architecture from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters from rl_coach.core_types import GradientClippingMethod from rl_coach.spaces import SpacesDefinition from rl_coach.utils import force_list, squeeze_list def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx): layers = [input_layer] # batchnorm if batchnorm: layers.append( tf.layers.batch_normalization(layers[-1], name="batchnorm{}".format(layer_idx)) ) # activation if activation_function: layers.append( activation_function(layers[-1], name="activation{}".format(layer_idx)) ) # dropout if dropout: layers.append( tf.layers.dropout(layers[-1], dropout_rate, name="dropout{}".format(layer_idx)) ) # remove the input layer from the layers list del layers[0] return layers class Conv2d(object): def __init__(self, params: List): """ :param params: list of [num_filters, kernel_size, strides] """ self.params = params def __call__(self, input_layer, name: str): """ returns a tensorflow conv2d layer :param input_layer: previous layer :param name: layer name :return: conv2d layer """ return tf.layers.conv2d(input_layer, filters=self.params[0], kernel_size=self.params[1], strides=self.params[2], data_format='channels_last', name=name) class Dense(object): def __init__(self, params: Union[List, int]): """ :param params: list of [num_output_neurons] """ self.params = force_list(params) def __call__(self, input_layer, name: str, kernel_initializer=None, activation=None): """ returns a tensorflow dense layer :param input_layer: previous layer :param name: layer name :return: dense layer """ return tf.layers.dense(input_layer, self.params[0], name=name, kernel_initializer=kernel_initializer, activation=activation) class NoisyNetDense(object): """ A factorized Noisy Net layer https://arxiv.org/abs/1706.10295. """ def __init__(self, params: List): """ :param params: list of [num_output_neurons] """ self.params = force_list(params) self.sigma0 = 0.5 def __call__(self, input_layer, name: str, kernel_initializer=None, activation=None): """ returns a NoisyNet dense layer :param input_layer: previous layer :param name: layer name :param kernel_initializer: initializer for kernels. Default is to use Gaussian noise that preserves stddev. :param activation: the activation function :return: dense layer """ #TODO: noise sampling should be externally controlled. DQN is fine with sampling noise for every # forward (either act or train, both for online and target networks). # A3C, on the other hand, should sample noise only when policy changes (i.e. after every t_max steps) num_inputs = input_layer.get_shape()[-1].value num_outputs = self.params[0] stddev = 1 / math.sqrt(num_inputs) activation = activation if activation is not None else (lambda x: x) if kernel_initializer is None: kernel_mean_initializer = tf.random_uniform_initializer(-stddev, stddev) kernel_stddev_initializer = tf.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0) else: kernel_mean_initializer = kernel_stddev_initializer = kernel_initializer with tf.variable_scope(None, default_name=name): weight_mean = tf.get_variable('weight_mean', shape=(num_inputs, num_outputs), initializer=kernel_mean_initializer) bias_mean = tf.get_variable('bias_mean', shape=(num_outputs,), initializer=tf.zeros_initializer()) weight_stddev = tf.get_variable('weight_stddev', shape=(num_inputs, num_outputs), initializer=kernel_stddev_initializer) bias_stddev = tf.get_variable('bias_stddev', shape=(num_outputs,), initializer=kernel_stddev_initializer) bias_noise = self.f(tf.random_normal((num_outputs,))) weight_noise = self.factorized_noise(num_inputs, num_outputs) bias = bias_mean + bias_stddev * bias_noise weight = weight_mean + weight_stddev * weight_noise return activation(tf.matmul(input_layer, weight) + bias) def factorized_noise(self, inputs, outputs): # TODO: use factorized noise only for compute intensive algos (e.g. DQN). # lighter algos (e.g. DQN) should not use it noise1 = self.f(tf.random_normal((inputs, 1))) noise2 = self.f(tf.random_normal((1, outputs))) return tf.matmul(noise1, noise2) @staticmethod def f(values): return tf.sqrt(tf.abs(values)) * tf.sign(values) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2] with tf.name_scope(layer_weight_name): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def local_getter(getter, name, *args, **kwargs): """ This is a wrapper around the tf.get_variable function which puts the variables in the local variables collection instead of the global variables collection. The local variables collection will hold variables which are not shared between workers. these variables are also assumed to be non-trainable (the optimizer does not apply gradients to these variables), but we can calculate the gradients wrt these variables, and we can update their content. """ kwargs['collections'] = [tf.GraphKeys.LOCAL_VARIABLES] return getter(name, *args, **kwargs) class TensorFlowArchitecture(Architecture): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= "", global_network=None, network_is_local: bool=True, network_is_trainable: bool=False): """ :param agent_parameters: the agent parameters :param spaces: the spaces definition of the agent :param name: the name of the network :param global_network: the global network replica that is shared between all the workers :param network_is_local: is the network global (shared between workers) or local (dedicated to the worker) :param network_is_trainable: is the network trainable (we can apply gradients on it) """ super().__init__(agent_parameters, spaces, name) self.middleware = None self.network_is_local = network_is_local self.global_network = global_network if not self.network_parameters.tensorflow_support: raise ValueError('TensorFlow is not supported for this agent') self.sess = None self.inputs = {} self.outputs = [] self.targets = [] self.importance_weights = [] self.losses = [] self.total_loss = None self.trainable_weights = [] self.weights_placeholders = [] self.shared_accumulated_gradients = [] self.curr_rnn_c_in = None self.curr_rnn_h_in = None self.gradients_wrt_inputs = [] self.train_writer = None self.accumulated_gradients = None self.network_is_trainable = network_is_trainable self.is_chief = self.ap.task_parameters.task_index == 0 self.network_is_global = not self.network_is_local and global_network is None self.distributed_training = self.network_is_global or self.network_is_local and global_network is not None self.optimizer_type = self.network_parameters.optimizer_type if self.ap.task_parameters.seed is not None: tf.set_random_seed(self.ap.task_parameters.seed) with tf.variable_scope("/".join(self.name.split("/")[1:]), initializer=tf.contrib.layers.xavier_initializer(), custom_getter=local_getter if network_is_local and global_network else None): self.global_step = tf.train.get_or_create_global_step() # build the network self.get_model() # model weights self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name) # create the placeholder for the assigning gradients and some tensorboard summaries for the weights for idx, var in enumerate(self.weights): placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder') self.weights_placeholders.append(placeholder) if self.ap.visualization.tensorboard: variable_summaries(var) # create op for assigning a list of weights to the network weights self.update_weights_from_list = [weights.assign(holder) for holder, weights in zip(self.weights_placeholders, self.weights)] # locks for synchronous training if self.network_is_global: self._create_locks_for_synchronous_training() # gradients ops self._create_gradient_ops() # L2 regularization if self.network_parameters.l2_regularization != 0: self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.weights]) * self.network_parameters.l2_regularization] tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization) self.inc_step = self.global_step.assign_add(1) # reset LSTM hidden cells self.reset_internal_memory() if self.ap.visualization.tensorboard: current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope=tf.contrib.framework.get_name_scope()) self.merged = tf.summary.merge(current_scope_summaries) # initialize or restore model self.init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) # set the fetches for training self._set_initial_fetch_list() def _set_initial_fetch_list(self): """ Create an initial list of tensors to fetch in each training iteration :return: None """ self.train_fetches = [self.gradients_norm] if self.network_parameters.clip_gradients: self.train_fetches.append(self.clipped_grads) else: self.train_fetches.append(self.tensor_gradients) self.train_fetches += [self.total_loss, self.losses] if self.middleware.__class__.__name__ == 'LSTMMiddleware': self.train_fetches.append(self.middleware.state_out) self.additional_fetches_start_idx = len(self.train_fetches) def _create_locks_for_synchronous_training(self): """ Create locks for synchronizing the different workers during training :return: None """ self.lock_counter = tf.get_variable("lock_counter", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) self.lock = self.lock_counter.assign_add(1, use_locking=True) self.lock_init = self.lock_counter.assign(0) self.release_counter = tf.get_variable("release_counter", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) self.release = self.release_counter.assign_add(1, use_locking=True) self.release_decrement = self.release_counter.assign_add(-1, use_locking=True) self.release_init = self.release_counter.assign(0) def _create_gradient_ops(self): """ Create all the tensorflow operations for calculating gradients, processing the gradients and applying them :return: None """ self.tensor_gradients = tf.gradients(self.total_loss, self.weights) self.gradients_norm = tf.global_norm(self.tensor_gradients) # gradient clipping if self.network_parameters.clip_gradients is not None and self.network_parameters.clip_gradients != 0: self._create_gradient_clipping_ops() # when using a shared optimizer, we create accumulators to store gradients from all the workers before # applying them if self.distributed_training: self._create_gradient_accumulators() # gradients of the outputs w.r.t. the inputs # at the moment, this is only used by ddpg self.gradients_wrt_inputs = [{name: tf.gradients(output, input_ph) for name, input_ph in self.inputs.items()} for output in self.outputs] self.gradients_weights_ph = [tf.placeholder('float32', self.outputs[i].shape, 'output_gradient_weights') for i in range(len(self.outputs))] self.weighted_gradients = [] for i in range(len(self.outputs)): unnormalized_gradients = tf.gradients(self.outputs[i], self.weights, self.gradients_weights_ph[i]) # unnormalized gradients seems to be better at the time. TODO: validate this accross more environments # self.weighted_gradients.append(list(map(lambda x: tf.div(x, self.network_parameters.batch_size), # unnormalized_gradients))) self.weighted_gradients.append(unnormalized_gradients) # defining the optimization process (for LBFGS we have less control over the optimizer) if self.optimizer_type != 'LBFGS' and self.network_is_trainable: self._create_gradient_applying_ops() def _create_gradient_accumulators(self): if self.network_is_global: self.shared_accumulated_gradients = [tf.Variable(initial_value=tf.zeros_like(var)) for var in self.weights] self.accumulate_shared_gradients = [var.assign_add(holder, use_locking=True) for holder, var in zip(self.weights_placeholders, self.shared_accumulated_gradients)] self.init_shared_accumulated_gradients = [var.assign(tf.zeros_like(var)) for var in self.shared_accumulated_gradients] elif self.network_is_local: self.accumulate_shared_gradients = self.global_network.accumulate_shared_gradients self.init_shared_accumulated_gradients = self.global_network.init_shared_accumulated_gradients def _create_gradient_clipping_ops(self): """ Create tensorflow ops for clipping the gradients according to the given GradientClippingMethod :return: None """ if self.network_parameters.gradients_clipping_method == GradientClippingMethod.ClipByGlobalNorm: self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients, self.network_parameters.clip_gradients) elif self.network_parameters.gradients_clipping_method == GradientClippingMethod.ClipByValue: self.clipped_grads = [tf.clip_by_value(grad, -self.network_parameters.clip_gradients, self.network_parameters.clip_gradients) for grad in self.tensor_gradients] elif self.network_parameters.gradients_clipping_method == GradientClippingMethod.ClipByNorm: self.clipped_grads = [tf.clip_by_norm(grad, self.network_parameters.clip_gradients) for grad in self.tensor_gradients] def _create_gradient_applying_ops(self): """ Create tensorflow ops for applying the gradients to the network weights according to the training scheme (distributed training - local or global network, shared optimizer, etc.) :return: None """ if self.network_is_global and self.network_parameters.shared_optimizer and \ not self.network_parameters.async_training: # synchronous training with shared optimizer? -> create an operation for applying the gradients # accumulated in the shared gradients accumulator self.update_weights_from_shared_gradients = self.optimizer.apply_gradients( zip(self.shared_accumulated_gradients, self.weights), global_step=self.global_step) elif self.distributed_training and self.network_is_local: # distributed training but independent optimizer? -> create an operation for applying the gradients # to the global weights self.update_weights_from_batch_gradients = self.optimizer.apply_gradients( zip(self.weights_placeholders, self.global_network.weights), global_step=self.global_step) elif self.network_is_trainable: # not any of the above but is trainable? -> create an operation for applying the gradients to # this network weights self.update_weights_from_batch_gradients = self.optimizer.apply_gradients( zip(self.weights_placeholders, self.weights), global_step=self.global_step) def set_session(self, sess): self.sess = sess task_is_distributed = isinstance(self.ap.task_parameters, DistributedTaskParameters) # initialize the session parameters in single threaded runs. Otherwise, this is done through the # MonitoredSession object in the graph manager if not task_is_distributed: self.sess.run(self.init_op) if self.ap.visualization.tensorboard: # Write the merged summaries to the current experiment directory if not task_is_distributed: self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard') self.train_writer.add_graph(self.sess.graph) elif self.network_is_local: self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard/worker{}'.format(self.ap.task_parameters.task_index)) self.train_writer.add_graph(self.sess.graph) # wait for all the workers to set their session if not self.network_is_local: self.wait_for_all_workers_barrier() def reset_accumulated_gradients(self): """ Reset the gradients accumulation placeholder """ if self.accumulated_gradients is None: self.accumulated_gradients = self.sess.run(self.weights) for ix, grad in enumerate(self.accumulated_gradients): self.accumulated_gradients[ix] = grad * 0 def accumulate_gradients(self, inputs, targets, additional_fetches=None, importance_weights=None, no_accumulation=False): """ Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation placeholders :param additional_fetches: Optional tensors to fetch during gradients calculation :param inputs: The input batch for the network :param targets: The targets corresponding to the input batch :param importance_weights: A coefficient for each sample in the batch, which will be used to rescale the loss error of this sample. If it is not given, the samples losses won't be scaled :param no_accumulation: If is set to True, the gradients in the accumulated gradients placeholder will be replaced by the newely calculated gradients instead of accumulating the new gradients. This can speed up the function runtime by around 10%. :return: A list containing the total loss and the individual network heads losses """ if self.accumulated_gradients is None: self.reset_accumulated_gradients() # feed inputs if additional_fetches is None: additional_fetches = [] feed_dict = self.create_feed_dict(inputs) # feed targets targets = force_list(targets) for placeholder_idx, target in enumerate(targets): feed_dict[self.targets[placeholder_idx]] = target # feed importance weights importance_weights = force_list(importance_weights) for placeholder_idx, target_ph in enumerate(targets): if len(importance_weights) <= placeholder_idx or importance_weights[placeholder_idx] is None: importance_weight = np.ones(target_ph.shape[0]) else: importance_weight = importance_weights[placeholder_idx] importance_weight = np.reshape(importance_weight, (-1,) + (1,)*(len(target_ph.shape)-1)) feed_dict[self.importance_weights[placeholder_idx]] = importance_weight if self.optimizer_type != 'LBFGS': # feed the lstm state if necessary if self.middleware.__class__.__name__ == 'LSTMMiddleware': # we can't always assume that we are starting from scratch here can we? feed_dict[self.middleware.c_in] = self.middleware.c_init feed_dict[self.middleware.h_in] = self.middleware.h_init fetches = self.train_fetches + additional_fetches if self.ap.visualization.tensorboard: fetches += [self.merged] # get grads result = self.sess.run(fetches, feed_dict=feed_dict) if hasattr(self, 'train_writer') and self.train_writer is not None: self.train_writer.add_summary(result[-1], self.sess.run(self.global_step)) # extract the fetches norm_unclipped_grads, grads, total_loss, losses = result[:4] if self.middleware.__class__.__name__ == 'LSTMMiddleware': (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4] fetched_tensors = [] if len(additional_fetches) > 0: fetched_tensors = result[self.additional_fetches_start_idx:self.additional_fetches_start_idx + len(additional_fetches)] # accumulate the gradients for idx, grad in enumerate(grads): if no_accumulation: self.accumulated_gradients[idx] = grad else: self.accumulated_gradients[idx] += grad return total_loss, losses, norm_unclipped_grads, fetched_tensors else: self.optimizer.minimize(session=self.sess, feed_dict=feed_dict) return [0] def create_feed_dict(self, inputs): feed_dict = {} for input_name, input_value in inputs.items(): if isinstance(input_name, str): if input_name not in self.inputs: raise ValueError(( 'input name {input_name} was provided to create a feed ' 'dictionary, but there is no placeholder with that name. ' 'placeholder names available include: {placeholder_names}' ).format( input_name=input_name, placeholder_names=', '.join(self.inputs.keys()) )) feed_dict[self.inputs[input_name]] = input_value elif isinstance(input_name, tf.Tensor) and input_name.op.type == 'Placeholder': feed_dict[input_name] = input_value else: raise ValueError(( 'input dictionary expects strings or placeholders as keys, ' 'but found key {key} of type {type}' ).format( key=input_name, type=type(input_name), )) return feed_dict def apply_and_reset_gradients(self, gradients, scaler=1.): """ Applies the given gradients to the network weights and resets the accumulation placeholder :param gradients: The gradients to use for the update :param scaler: A scaling factor that allows rescaling the gradients before applying them """ self.apply_gradients(gradients, scaler) self.reset_accumulated_gradients() def wait_for_all_workers_to_lock(self, lock: str, include_only_training_workers: bool=False): """ Waits for all the workers to lock a certain lock and then continues :param lock: the name of the lock to use :param include_only_training_workers: wait only for training workers or for all the workers? :return: None """ if include_only_training_workers: num_workers_to_wait_for = self.ap.task_parameters.num_training_tasks else: num_workers_to_wait_for = self.ap.task_parameters.num_tasks # lock if hasattr(self, '{}_counter'.format(lock)): self.sess.run(getattr(self, lock)) while self.sess.run(getattr(self, '{}_counter'.format(lock))) % num_workers_to_wait_for != 0: time.sleep(0.00001) # self.sess.run(getattr(self, '{}_init'.format(lock))) else: raise ValueError("no counter was defined for the lock {}".format(lock)) def wait_for_all_workers_barrier(self, include_only_training_workers: bool=False): """ A barrier that allows waiting for all the workers to finish a certain block of commands :param include_only_training_workers: wait only for training workers or for all the workers? :return: None """ self.wait_for_all_workers_to_lock('lock', include_only_training_workers=include_only_training_workers) self.sess.run(self.lock_init) # we need to lock again (on a different lock) in order to prevent a situation where one of the workers continue # and then was able to first increase the lock again by one, only to have a late worker to reset it again. # so we want to make sure that all workers are done resetting the lock before continuting to reuse that lock. self.wait_for_all_workers_to_lock('release', include_only_training_workers=include_only_training_workers) self.sess.run(self.release_init) def apply_gradients(self, gradients, scaler=1.): """ Applies the given gradients to the network weights :param gradients: The gradients to use for the update :param scaler: A scaling factor that allows rescaling the gradients before applying them. The gradients will be MULTIPLIED by this factor """ if self.network_parameters.async_training or not isinstance(self.ap.task_parameters, DistributedTaskParameters): if hasattr(self, 'global_step') and not self.network_is_local: self.sess.run(self.inc_step) if self.optimizer_type != 'LBFGS': if self.distributed_training and not self.network_parameters.async_training: # rescale the gradients so that they average out with the gradients from the other workers if self.network_parameters.scale_down_gradients_by_number_of_workers_for_sync_training: scaler /= float(self.ap.task_parameters.num_training_tasks) # rescale the gradients if scaler != 1.: for gradient in gradients: gradient *= scaler # apply the gradients feed_dict = dict(zip(self.weights_placeholders, gradients)) if self.distributed_training and self.network_parameters.shared_optimizer \ and not self.network_parameters.async_training: # synchronous distributed training with shared optimizer: # - each worker adds its gradients to the shared gradients accumulators # - we wait for all the workers to add their gradients # - the chief worker (worker with task index = 0) applies the gradients once and resets the accumulators self.sess.run(self.accumulate_shared_gradients, feed_dict=feed_dict) self.wait_for_all_workers_barrier(include_only_training_workers=True) if self.is_chief: self.sess.run(self.update_weights_from_shared_gradients) self.sess.run(self.init_shared_accumulated_gradients) else: # async distributed training / distributed training with independent optimizer # / non-distributed training - just apply the gradients feed_dict = dict(zip(self.weights_placeholders, gradients)) self.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict) # release barrier if self.distributed_training and not self.network_parameters.async_training: self.wait_for_all_workers_barrier(include_only_training_workers=True) def predict(self, inputs, outputs=None, squeeze_output=True, initial_feed_dict=None): """ Run a forward pass of the network using the given input :param inputs: The input for the network :param outputs: The output for the network, defaults to self.outputs :param squeeze_output: call squeeze_list on output :param initial_feed_dict: a dictionary to use as the initial feed_dict. other inputs will be added to this dict :return: The network output WARNING: must only call once per state since each call is assumed by LSTM to be a new time step. """ feed_dict = self.create_feed_dict(inputs) if initial_feed_dict: feed_dict.update(initial_feed_dict) if outputs is None: outputs = self.outputs if self.middleware.__class__.__name__ == 'LSTMMiddleware': feed_dict[self.middleware.c_in] = self.curr_rnn_c_in feed_dict[self.middleware.h_in] = self.curr_rnn_h_in output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.sess.run([outputs, self.middleware.state_out], feed_dict=feed_dict) else: output = self.sess.run(outputs, feed_dict) if squeeze_output: output = squeeze_list(output) return output def train_on_batch(self, inputs, targets, scaler=1., additional_fetches=None, importance_weights=None): """ Given a batch of examples and targets, runs a forward pass & backward pass and then applies the gradients :param additional_fetches: Optional tensors to fetch during the training process :param inputs: The input for the network :param targets: The targets corresponding to the input batch :param scaler: A scaling factor that allows rescaling the gradients before applying them :param importance_weights: A coefficient for each sample in the batch, which will be used to rescale the loss error of this sample. If it is not given, the samples losses won't be scaled :return: The loss of the network """ if additional_fetches is None: additional_fetches = [] force_list(additional_fetches) loss = self.accumulate_gradients(inputs, targets, additional_fetches=additional_fetches, importance_weights=importance_weights) self.apply_and_reset_gradients(self.accumulated_gradients, scaler) return loss def get_weights(self): """ :return: a list of tensors containing the network weights for each layer """ return self.weights def set_weights(self, weights, new_rate=1.0): """ Sets the network weights from the given list of weights tensors """ feed_dict = {} old_weights, new_weights = self.sess.run([self.get_weights(), weights]) for placeholder_idx, new_weight in enumerate(new_weights): feed_dict[self.weights_placeholders[placeholder_idx]]\ = new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx] self.sess.run(self.update_weights_from_list, feed_dict) def get_variable_value(self, variable): """ Get the value of a variable from the graph :param variable: the variable :return: the value of the variable """ return self.sess.run(variable) def set_variable_value(self, assign_op, value, placeholder=None): """ Updates the value of a variable. This requires having an assign operation for the variable, and a placeholder which will provide the value :param assign_op: an assign operation for the variable :param value: a value to set the variable to :param placeholder: a placeholder to hold the given value for injecting it into the variable """ self.sess.run(assign_op, feed_dict={placeholder: value}) def reset_internal_memory(self): """ Reset any internal memory used by the network. For example, an LSTM internal state :return: None """ # initialize LSTM hidden states if self.middleware.__class__.__name__ == 'LSTMMiddleware': self.curr_rnn_c_in = self.middleware.c_init self.curr_rnn_h_in = self.middleware.h_init