mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Till now, most of the modules were importing all of the module objects (variables, classes, functions, other imports) into module namespace, which potentially could (and was) cause of unintentional use of class or methods, which was indirect imported. With this patch, all the star imports were substituted with top-level module, which provides desired class or function. Besides, all imports where sorted (where possible) in a way pep8[1] suggests - first are imports from standard library, than goes third party imports (like numpy, tensorflow etc) and finally coach modules. All of those sections are separated by one empty line. [1] https://www.python.org/dev/peps/pep-0008/#imports
365 lines
16 KiB
Python
365 lines
16 KiB
Python
#
|
|
# Copyright (c) 2017 Intel Corporation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import time
|
|
|
|
import tensorflow as tf
|
|
|
|
from architectures import architecture
|
|
import configurations as conf
|
|
import utils
|
|
|
|
def variable_summaries(var):
|
|
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
|
|
with tf.name_scope('summaries'):
|
|
layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2]
|
|
|
|
with tf.name_scope(layer_weight_name):
|
|
mean = tf.reduce_mean(var)
|
|
tf.summary.scalar('mean', mean)
|
|
with tf.name_scope('stddev'):
|
|
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
|
tf.summary.scalar('stddev', stddev)
|
|
tf.summary.scalar('max', tf.reduce_max(var))
|
|
tf.summary.scalar('min', tf.reduce_min(var))
|
|
tf.summary.histogram('histogram', var)
|
|
|
|
class TensorFlowArchitecture(architecture.Architecture):
|
|
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
|
|
"""
|
|
:param tuning_parameters: The parameters used for running the algorithm
|
|
:type tuning_parameters: Preset
|
|
:param name: The name of the network
|
|
"""
|
|
architecture.Architecture.__init__(self, tuning_parameters, name)
|
|
self.middleware_embedder = None
|
|
self.network_is_local = network_is_local
|
|
assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
|
|
self.sess = tuning_parameters.sess
|
|
self.inputs = {}
|
|
self.outputs = []
|
|
self.targets = []
|
|
self.losses = []
|
|
self.total_loss = None
|
|
self.trainable_weights = []
|
|
self.weights_placeholders = []
|
|
self.curr_rnn_c_in = None
|
|
self.curr_rnn_h_in = None
|
|
self.gradients_wrt_inputs = []
|
|
self.train_writer = None
|
|
|
|
self.optimizer_type = self.tp.agent.optimizer_type
|
|
if self.tp.seed is not None:
|
|
tf.set_random_seed(self.tp.seed)
|
|
with tf.variable_scope(self.name, initializer=tf.contrib.layers.xavier_initializer()):
|
|
self.global_step = tf.train.get_or_create_global_step()
|
|
|
|
# build the network
|
|
self.get_model(tuning_parameters)
|
|
|
|
# model weights
|
|
self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
|
|
|
|
# locks for synchronous training
|
|
if self.tp.distributed and not self.tp.agent.async_training and not self.network_is_local:
|
|
self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
|
|
initializer=tf.constant_initializer(0, dtype=tf.int32),
|
|
trainable=False)
|
|
self.lock = self.lock_counter.assign_add(1, use_locking=True)
|
|
self.lock_init = self.lock_counter.assign(0)
|
|
|
|
self.release_counter = tf.get_variable("release_counter", [], tf.int32,
|
|
initializer=tf.constant_initializer(0, dtype=tf.int32),
|
|
trainable=False)
|
|
self.release = self.release_counter.assign_add(1, use_locking=True)
|
|
self.release_init = self.release_counter.assign(0)
|
|
|
|
# local network does the optimization so we need to create all the ops we are going to use to optimize
|
|
for idx, var in enumerate(self.trainable_weights):
|
|
placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
|
|
self.weights_placeholders.append(placeholder)
|
|
if self.tp.visualization.tensorboard:
|
|
variable_summaries(var)
|
|
|
|
self.update_weights_from_list = [weights.assign(holder) for holder, weights in
|
|
zip(self.weights_placeholders, self.trainable_weights)]
|
|
|
|
# gradients ops
|
|
self.tensor_gradients = tf.gradients(self.total_loss, self.trainable_weights)
|
|
self.gradients_norm = tf.global_norm(self.tensor_gradients)
|
|
if self.tp.clip_gradients is not None and self.tp.clip_gradients != 0:
|
|
self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients,
|
|
tuning_parameters.clip_gradients)
|
|
|
|
# gradients of the outputs w.r.t. the inputs
|
|
# at the moment, this is only used by ddpg
|
|
if len(self.outputs) == 1:
|
|
self.gradients_wrt_inputs = {name: tf.gradients(self.outputs[0], input_ph) for name, input_ph in self.inputs.items()}
|
|
self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
|
|
self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
|
|
|
|
# L2 regularization
|
|
if self.tp.agent.l2_regularization != 0:
|
|
self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.trainable_weights])
|
|
* self.tp.agent.l2_regularization]
|
|
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)
|
|
|
|
self.inc_step = self.global_step.assign_add(1)
|
|
|
|
# defining the optimization process (for LBFGS we have less control over the optimizer)
|
|
if self.optimizer_type != 'LBFGS':
|
|
# no global network, this is a plain simple centralized training
|
|
self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
|
|
zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)
|
|
|
|
current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
|
|
scope=tf.contrib.framework.get_name_scope())
|
|
self.merged = tf.summary.merge(current_scope_summaries)
|
|
|
|
# initialize or restore model
|
|
if not self.tp.distributed:
|
|
# Merge all the summaries
|
|
|
|
self.init_op = tf.global_variables_initializer()
|
|
|
|
if self.sess:
|
|
if self.tp.visualization.tensorboard:
|
|
# Write the merged summaries to the current experiment directory
|
|
self.train_writer = tf.summary.FileWriter(self.tp.experiment_path + '/tensorboard',
|
|
self.sess.graph)
|
|
self.sess.run(self.init_op)
|
|
|
|
self.accumulated_gradients = None
|
|
|
|
def reset_accumulated_gradients(self):
|
|
"""
|
|
Reset the gradients accumulation placeholder
|
|
"""
|
|
if self.accumulated_gradients is None:
|
|
self.accumulated_gradients = self.tp.sess.run(self.trainable_weights)
|
|
|
|
for ix, grad in enumerate(self.accumulated_gradients):
|
|
self.accumulated_gradients[ix] = grad * 0
|
|
|
|
def accumulate_gradients(self, inputs, targets, additional_fetches=None):
|
|
"""
|
|
Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation
|
|
placeholders
|
|
:param additional_fetches: Optional tensors to fetch during gradients calculation
|
|
:param inputs: The input batch for the network
|
|
:param targets: The targets corresponding to the input batch
|
|
:return: A list containing the total loss and the individual network heads losses
|
|
"""
|
|
|
|
if self.accumulated_gradients is None:
|
|
self.reset_accumulated_gradients()
|
|
|
|
# feed inputs
|
|
if additional_fetches is None:
|
|
additional_fetches = []
|
|
|
|
feed_dict = self._feed_dict(inputs)
|
|
|
|
# feed targets
|
|
targets = utils.force_list(targets)
|
|
for placeholder_idx, target in enumerate(targets):
|
|
feed_dict[self.targets[placeholder_idx]] = target
|
|
|
|
if self.optimizer_type != 'LBFGS':
|
|
# set the fetches
|
|
fetches = [self.gradients_norm]
|
|
if self.tp.clip_gradients:
|
|
fetches.append(self.clipped_grads)
|
|
else:
|
|
fetches.append(self.tensor_gradients)
|
|
fetches += [self.total_loss, self.losses]
|
|
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
|
|
fetches.append(self.middleware_embedder.state_out)
|
|
additional_fetches_start_idx = len(fetches)
|
|
fetches += additional_fetches
|
|
|
|
# feed the lstm state if necessary
|
|
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
|
|
# we can't always assume that we are starting from scratch here can we?
|
|
feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
|
|
feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
|
|
|
|
fetches += [self.merged]
|
|
|
|
# get grads
|
|
result = self.tp.sess.run(fetches, feed_dict=feed_dict)
|
|
if hasattr(self, 'train_writer') and self.train_writer is not None:
|
|
self.train_writer.add_summary(result[-1], self.tp.current_episode)
|
|
|
|
# extract the fetches
|
|
norm_unclipped_grads, grads, total_loss, losses = result[:4]
|
|
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
|
|
(self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
|
|
fetched_tensors = []
|
|
if len(additional_fetches) > 0:
|
|
fetched_tensors = result[additional_fetches_start_idx:additional_fetches_start_idx +
|
|
len(additional_fetches)]
|
|
|
|
# accumulate the gradients
|
|
for idx, grad in enumerate(grads):
|
|
self.accumulated_gradients[idx] += grad
|
|
|
|
return total_loss, losses, norm_unclipped_grads, fetched_tensors
|
|
|
|
else:
|
|
self.optimizer.minimize(session=self.tp.sess, feed_dict=feed_dict)
|
|
|
|
return [0]
|
|
|
|
def apply_and_reset_gradients(self, gradients, scaler=1.):
|
|
"""
|
|
Applies the given gradients to the network weights and resets the accumulation placeholder
|
|
:param gradients: The gradients to use for the update
|
|
:param scaler: A scaling factor that allows rescaling the gradients before applying them
|
|
"""
|
|
self.apply_gradients(gradients, scaler)
|
|
self.reset_accumulated_gradients()
|
|
|
|
def apply_gradients(self, gradients, scaler=1.):
|
|
"""
|
|
Applies the given gradients to the network weights
|
|
:param gradients: The gradients to use for the update
|
|
:param scaler: A scaling factor that allows rescaling the gradients before applying them
|
|
"""
|
|
if self.tp.agent.async_training or not self.tp.distributed:
|
|
if hasattr(self, 'global_step') and not self.network_is_local:
|
|
self.tp.sess.run(self.inc_step)
|
|
|
|
if self.optimizer_type != 'LBFGS':
|
|
|
|
# lock barrier
|
|
if hasattr(self, 'lock_counter'):
|
|
self.tp.sess.run(self.lock)
|
|
while self.tp.sess.run(self.lock_counter) % self.tp.num_threads != 0:
|
|
time.sleep(0.00001)
|
|
# rescale the gradients so that they average out with the gradients from the other workers
|
|
scaler /= float(self.tp.num_threads)
|
|
|
|
# apply gradients
|
|
if scaler != 1.:
|
|
for gradient in gradients:
|
|
gradient /= scaler
|
|
feed_dict = dict(zip(self.weights_placeholders, gradients))
|
|
_ = self.tp.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict)
|
|
|
|
# release barrier
|
|
if hasattr(self, 'release_counter'):
|
|
self.tp.sess.run(self.release)
|
|
while self.tp.sess.run(self.release_counter) % self.tp.num_threads != 0:
|
|
time.sleep(0.00001)
|
|
|
|
def _feed_dict(self, inputs):
|
|
feed_dict = {}
|
|
for input_name, input_value in inputs.items():
|
|
if isinstance(input_name, str):
|
|
if input_name not in self.inputs:
|
|
raise ValueError((
|
|
'input name {input_name} was provided to create a feed '
|
|
'dictionary, but there is no placeholder with that name. '
|
|
'placeholder names available include: {placeholder_names}'
|
|
).format(
|
|
input_name=input_name,
|
|
placeholder_names=', '.join(self.inputs.keys())
|
|
))
|
|
|
|
feed_dict[self.inputs[input_name]] = input_value
|
|
elif isinstance(input_name, tf.Tensor) and input_name.op.type == 'Placeholder':
|
|
feed_dict[input_name] = input_value
|
|
else:
|
|
raise ValueError((
|
|
'input dictionary expects strings or placeholders as keys, '
|
|
'but found key {key} of type {type}'
|
|
).format(
|
|
key=input_name,
|
|
type=type(input_name),
|
|
))
|
|
|
|
return feed_dict
|
|
|
|
def predict(self, inputs, outputs=None, squeeze_output=True):
|
|
"""
|
|
Run a forward pass of the network using the given input
|
|
:param inputs: The input for the network
|
|
:param outputs: The output for the network, defaults to self.outputs
|
|
:param squeeze_output: call squeeze_list on output
|
|
:return: The network output
|
|
|
|
WARNING: must only call once per state since each call is assumed by LSTM to be a new time step.
|
|
"""
|
|
feed_dict = self._feed_dict(inputs)
|
|
if outputs is None:
|
|
outputs = self.outputs
|
|
|
|
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
|
|
feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
|
|
feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
|
|
|
|
output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.tp.sess.run([outputs, self.middleware_embedder.state_out], feed_dict=feed_dict)
|
|
else:
|
|
output = self.tp.sess.run(outputs, feed_dict)
|
|
|
|
if squeeze_output:
|
|
output = utils.squeeze_list(output)
|
|
|
|
return output
|
|
|
|
def get_weights(self):
|
|
"""
|
|
:return: a list of tensors containing the network weights for each layer
|
|
"""
|
|
return self.trainable_weights
|
|
|
|
def set_weights(self, weights, new_rate=1.0):
|
|
"""
|
|
Sets the network weights from the given list of weights tensors
|
|
"""
|
|
feed_dict = {}
|
|
old_weights, new_weights = self.tp.sess.run([self.get_weights(), weights])
|
|
for placeholder_idx, new_weight in enumerate(new_weights):
|
|
feed_dict[self.weights_placeholders[placeholder_idx]]\
|
|
= new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx]
|
|
self.tp.sess.run(self.update_weights_from_list, feed_dict)
|
|
|
|
def write_graph_to_logdir(self, summary_dir):
|
|
"""
|
|
Writes the tensorflow graph to the logdir for tensorboard visualization
|
|
:param summary_dir: the path to the logdir
|
|
"""
|
|
summary_writer = tf.summary.FileWriter(summary_dir)
|
|
summary_writer.add_graph(self.sess.graph)
|
|
|
|
def get_variable_value(self, variable):
|
|
"""
|
|
Get the value of a variable from the graph
|
|
:param variable: the variable
|
|
:return: the value of the variable
|
|
"""
|
|
return self.sess.run(variable)
|
|
|
|
def set_variable_value(self, assign_op, value, placeholder=None):
|
|
"""
|
|
Updates the value of a variable.
|
|
This requires having an assign operation for the variable, and a placeholder which will provide the value
|
|
:param assign_op: an assign operation for the variable
|
|
:param value: a value to set the variable to
|
|
:param placeholder: a placeholder to hold the given value for injecting it into the variable
|
|
"""
|
|
self.sess.run(assign_op, feed_dict={placeholder: value})
|