1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00
Files
coach/architectures/tensorflow_components/architecture.py
Roman Dobosz 1b095aeeca Cleanup imports.
Till now, most of the modules were importing all of the module objects
(variables, classes, functions, other imports) into module namespace,
which potentially could (and was) cause of unintentional use of class or
methods, which was indirect imported.

With this patch, all the star imports were substituted with top-level
module, which provides desired class or function.

Besides, all imports where sorted (where possible) in a way pep8[1]
suggests - first are imports from standard library, than goes third
party imports (like numpy, tensorflow etc) and finally coach modules.
All of those sections are separated by one empty line.

[1] https://www.python.org/dev/peps/pep-0008/#imports
2018-04-13 09:58:40 +02:00

365 lines
16 KiB
Python

#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
import tensorflow as tf
from architectures import architecture
import configurations as conf
import utils
def variable_summaries(var):
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
with tf.name_scope('summaries'):
layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2]
with tf.name_scope(layer_weight_name):
mean = tf.reduce_mean(var)
tf.summary.scalar('mean', mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
tf.summary.scalar('stddev', stddev)
tf.summary.scalar('max', tf.reduce_max(var))
tf.summary.scalar('min', tf.reduce_min(var))
tf.summary.histogram('histogram', var)
class TensorFlowArchitecture(architecture.Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
"""
:param tuning_parameters: The parameters used for running the algorithm
:type tuning_parameters: Preset
:param name: The name of the network
"""
architecture.Architecture.__init__(self, tuning_parameters, name)
self.middleware_embedder = None
self.network_is_local = network_is_local
assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
self.sess = tuning_parameters.sess
self.inputs = {}
self.outputs = []
self.targets = []
self.losses = []
self.total_loss = None
self.trainable_weights = []
self.weights_placeholders = []
self.curr_rnn_c_in = None
self.curr_rnn_h_in = None
self.gradients_wrt_inputs = []
self.train_writer = None
self.optimizer_type = self.tp.agent.optimizer_type
if self.tp.seed is not None:
tf.set_random_seed(self.tp.seed)
with tf.variable_scope(self.name, initializer=tf.contrib.layers.xavier_initializer()):
self.global_step = tf.train.get_or_create_global_step()
# build the network
self.get_model(tuning_parameters)
# model weights
self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
# locks for synchronous training
if self.tp.distributed and not self.tp.agent.async_training and not self.network_is_local:
self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
initializer=tf.constant_initializer(0, dtype=tf.int32),
trainable=False)
self.lock = self.lock_counter.assign_add(1, use_locking=True)
self.lock_init = self.lock_counter.assign(0)
self.release_counter = tf.get_variable("release_counter", [], tf.int32,
initializer=tf.constant_initializer(0, dtype=tf.int32),
trainable=False)
self.release = self.release_counter.assign_add(1, use_locking=True)
self.release_init = self.release_counter.assign(0)
# local network does the optimization so we need to create all the ops we are going to use to optimize
for idx, var in enumerate(self.trainable_weights):
placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
self.weights_placeholders.append(placeholder)
if self.tp.visualization.tensorboard:
variable_summaries(var)
self.update_weights_from_list = [weights.assign(holder) for holder, weights in
zip(self.weights_placeholders, self.trainable_weights)]
# gradients ops
self.tensor_gradients = tf.gradients(self.total_loss, self.trainable_weights)
self.gradients_norm = tf.global_norm(self.tensor_gradients)
if self.tp.clip_gradients is not None and self.tp.clip_gradients != 0:
self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients,
tuning_parameters.clip_gradients)
# gradients of the outputs w.r.t. the inputs
# at the moment, this is only used by ddpg
if len(self.outputs) == 1:
self.gradients_wrt_inputs = {name: tf.gradients(self.outputs[0], input_ph) for name, input_ph in self.inputs.items()}
self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
# L2 regularization
if self.tp.agent.l2_regularization != 0:
self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.trainable_weights])
* self.tp.agent.l2_regularization]
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)
self.inc_step = self.global_step.assign_add(1)
# defining the optimization process (for LBFGS we have less control over the optimizer)
if self.optimizer_type != 'LBFGS':
# no global network, this is a plain simple centralized training
self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)
current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
scope=tf.contrib.framework.get_name_scope())
self.merged = tf.summary.merge(current_scope_summaries)
# initialize or restore model
if not self.tp.distributed:
# Merge all the summaries
self.init_op = tf.global_variables_initializer()
if self.sess:
if self.tp.visualization.tensorboard:
# Write the merged summaries to the current experiment directory
self.train_writer = tf.summary.FileWriter(self.tp.experiment_path + '/tensorboard',
self.sess.graph)
self.sess.run(self.init_op)
self.accumulated_gradients = None
def reset_accumulated_gradients(self):
"""
Reset the gradients accumulation placeholder
"""
if self.accumulated_gradients is None:
self.accumulated_gradients = self.tp.sess.run(self.trainable_weights)
for ix, grad in enumerate(self.accumulated_gradients):
self.accumulated_gradients[ix] = grad * 0
def accumulate_gradients(self, inputs, targets, additional_fetches=None):
"""
Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation
placeholders
:param additional_fetches: Optional tensors to fetch during gradients calculation
:param inputs: The input batch for the network
:param targets: The targets corresponding to the input batch
:return: A list containing the total loss and the individual network heads losses
"""
if self.accumulated_gradients is None:
self.reset_accumulated_gradients()
# feed inputs
if additional_fetches is None:
additional_fetches = []
feed_dict = self._feed_dict(inputs)
# feed targets
targets = utils.force_list(targets)
for placeholder_idx, target in enumerate(targets):
feed_dict[self.targets[placeholder_idx]] = target
if self.optimizer_type != 'LBFGS':
# set the fetches
fetches = [self.gradients_norm]
if self.tp.clip_gradients:
fetches.append(self.clipped_grads)
else:
fetches.append(self.tensor_gradients)
fetches += [self.total_loss, self.losses]
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
fetches.append(self.middleware_embedder.state_out)
additional_fetches_start_idx = len(fetches)
fetches += additional_fetches
# feed the lstm state if necessary
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
# we can't always assume that we are starting from scratch here can we?
feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
fetches += [self.merged]
# get grads
result = self.tp.sess.run(fetches, feed_dict=feed_dict)
if hasattr(self, 'train_writer') and self.train_writer is not None:
self.train_writer.add_summary(result[-1], self.tp.current_episode)
# extract the fetches
norm_unclipped_grads, grads, total_loss, losses = result[:4]
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
(self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
fetched_tensors = []
if len(additional_fetches) > 0:
fetched_tensors = result[additional_fetches_start_idx:additional_fetches_start_idx +
len(additional_fetches)]
# accumulate the gradients
for idx, grad in enumerate(grads):
self.accumulated_gradients[idx] += grad
return total_loss, losses, norm_unclipped_grads, fetched_tensors
else:
self.optimizer.minimize(session=self.tp.sess, feed_dict=feed_dict)
return [0]
def apply_and_reset_gradients(self, gradients, scaler=1.):
"""
Applies the given gradients to the network weights and resets the accumulation placeholder
:param gradients: The gradients to use for the update
:param scaler: A scaling factor that allows rescaling the gradients before applying them
"""
self.apply_gradients(gradients, scaler)
self.reset_accumulated_gradients()
def apply_gradients(self, gradients, scaler=1.):
"""
Applies the given gradients to the network weights
:param gradients: The gradients to use for the update
:param scaler: A scaling factor that allows rescaling the gradients before applying them
"""
if self.tp.agent.async_training or not self.tp.distributed:
if hasattr(self, 'global_step') and not self.network_is_local:
self.tp.sess.run(self.inc_step)
if self.optimizer_type != 'LBFGS':
# lock barrier
if hasattr(self, 'lock_counter'):
self.tp.sess.run(self.lock)
while self.tp.sess.run(self.lock_counter) % self.tp.num_threads != 0:
time.sleep(0.00001)
# rescale the gradients so that they average out with the gradients from the other workers
scaler /= float(self.tp.num_threads)
# apply gradients
if scaler != 1.:
for gradient in gradients:
gradient /= scaler
feed_dict = dict(zip(self.weights_placeholders, gradients))
_ = self.tp.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict)
# release barrier
if hasattr(self, 'release_counter'):
self.tp.sess.run(self.release)
while self.tp.sess.run(self.release_counter) % self.tp.num_threads != 0:
time.sleep(0.00001)
def _feed_dict(self, inputs):
feed_dict = {}
for input_name, input_value in inputs.items():
if isinstance(input_name, str):
if input_name not in self.inputs:
raise ValueError((
'input name {input_name} was provided to create a feed '
'dictionary, but there is no placeholder with that name. '
'placeholder names available include: {placeholder_names}'
).format(
input_name=input_name,
placeholder_names=', '.join(self.inputs.keys())
))
feed_dict[self.inputs[input_name]] = input_value
elif isinstance(input_name, tf.Tensor) and input_name.op.type == 'Placeholder':
feed_dict[input_name] = input_value
else:
raise ValueError((
'input dictionary expects strings or placeholders as keys, '
'but found key {key} of type {type}'
).format(
key=input_name,
type=type(input_name),
))
return feed_dict
def predict(self, inputs, outputs=None, squeeze_output=True):
"""
Run a forward pass of the network using the given input
:param inputs: The input for the network
:param outputs: The output for the network, defaults to self.outputs
:param squeeze_output: call squeeze_list on output
:return: The network output
WARNING: must only call once per state since each call is assumed by LSTM to be a new time step.
"""
feed_dict = self._feed_dict(inputs)
if outputs is None:
outputs = self.outputs
if self.tp.agent.middleware_type == conf.MiddlewareTypes.LSTM:
feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.tp.sess.run([outputs, self.middleware_embedder.state_out], feed_dict=feed_dict)
else:
output = self.tp.sess.run(outputs, feed_dict)
if squeeze_output:
output = utils.squeeze_list(output)
return output
def get_weights(self):
"""
:return: a list of tensors containing the network weights for each layer
"""
return self.trainable_weights
def set_weights(self, weights, new_rate=1.0):
"""
Sets the network weights from the given list of weights tensors
"""
feed_dict = {}
old_weights, new_weights = self.tp.sess.run([self.get_weights(), weights])
for placeholder_idx, new_weight in enumerate(new_weights):
feed_dict[self.weights_placeholders[placeholder_idx]]\
= new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx]
self.tp.sess.run(self.update_weights_from_list, feed_dict)
def write_graph_to_logdir(self, summary_dir):
"""
Writes the tensorflow graph to the logdir for tensorboard visualization
:param summary_dir: the path to the logdir
"""
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_graph(self.sess.graph)
def get_variable_value(self, variable):
"""
Get the value of a variable from the graph
:param variable: the variable
:return: the value of the variable
"""
return self.sess.run(variable)
def set_variable_value(self, assign_op, value, placeholder=None):
"""
Updates the value of a variable.
This requires having an assign operation for the variable, and a placeholder which will provide the value
:param assign_op: an assign operation for the variable
:param value: a value to set the variable to
:param placeholder: a placeholder to hold the given value for injecting it into the variable
"""
self.sess.run(assign_op, feed_dict={placeholder: value})