1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 03:30:19 +01:00

coach v0.8.0

This commit is contained in:
Gal Leibovich
2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions

31
architectures/__init__.py Normal file
View File

@@ -0,0 +1,31 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from architectures.architecture import *
from logger import failed_imports
try:
from architectures.tensorflow_components.general_network import *
from architectures.tensorflow_components.architecture import *
except ImportError:
failed_imports.append("TensorFlow")
try:
from architectures.neon_components.general_network import *
from architectures.neon_components.architecture import *
except ImportError:
failed_imports.append("Neon")
from architectures.network_wrapper import *

View File

@@ -0,0 +1,70 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from configurations import Preset
class Architecture:
def __init__(self, tuning_parameters, name=""):
"""
:param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset
:param name: The name of the network
:param name: string
"""
self.batch_size = tuning_parameters.batch_size
self.input_depth = tuning_parameters.env.observation_stack_size
self.input_height = tuning_parameters.env.desired_observation_height
self.input_width = tuning_parameters.env.desired_observation_width
self.num_actions = tuning_parameters.env.action_space_size
self.measurements_size = tuning_parameters.env.measurements_size \
if tuning_parameters.env.measurements_size else 0
self.learning_rate = tuning_parameters.learning_rate
self.optimizer = None
self.name = name
self.tp = tuning_parameters
def get_model(self, tuning_parameters):
"""
:param tuning_parameters: A Preset class instance with all the running parameters
:type tuning_parameters: Preset
:return: A model
"""
pass
def predict(self, inputs):
pass
def train_on_batch(self, inputs, targets):
pass
def get_weights(self):
pass
def set_weights(self, weights, rate=1.0):
pass
def reset_accumulated_gradients(self):
pass
def accumulate_gradients(self, inputs, targets):
pass
def apply_and_reset_gradients(self, gradients):
pass
def apply_gradients(self, gradients):
pass

View File

@@ -0,0 +1,129 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
import copy
from ngraph.frontends.neon import *
import ngraph as ng
from architectures.architecture import *
import numpy as np
from utils import *
class NeonArchitecture(Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
Architecture.__init__(self, tuning_parameters, name)
assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
self.clip_error = tuning_parameters.clip_gradients
self.total_loss = None
self.epoch = 0
self.inputs = []
self.outputs = []
self.targets = []
self.losses = []
self.transformer = tuning_parameters.sess
self.network = self.get_model(tuning_parameters)
self.accumulated_gradients = []
# training and inference ops
train_output = ng.sequential([
self.optimizer(self.total_loss),
self.total_loss
])
placeholders = self.inputs + self.targets
self.train_op = self.transformer.add_computation(
ng.computation(
train_output, *placeholders
)
)
self.predict_op = self.transformer.add_computation(
ng.computation(
self.outputs, self.inputs[0]
)
)
# update weights from array op
self.weights = [ng.placeholder(w.axes) for w in self.total_loss.variables()]
self.set_weights_ops = []
for target_variable, variable in zip(self.total_loss.variables(), self.weights):
self.set_weights_ops.append(self.transformer.add_computation(
ng.computation(
ng.assign(target_variable, variable), variable
)
))
# get weights op
self.get_variables = self.transformer.add_computation(
ng.computation(
self.total_loss.variables()
)
)
def predict(self, inputs):
batch_size = inputs.shape[0]
# move batch axis to the end
inputs = inputs.swapaxes(0, -1)
prediction = self.predict_op(inputs) # TODO: problem with multiple inputs
if type(prediction) != tuple:
prediction = (prediction)
# process all the outputs from the network
output = []
for p in prediction:
output.append(p.transpose()[:batch_size].copy())
# if there is only one output then we don't need a list
if len(output) == 1:
output = output[0]
return output
def train_on_batch(self, inputs, targets):
loss = self.accumulate_gradients(inputs, targets)
self.apply_and_reset_gradients(self.accumulated_gradients)
return loss
def get_weights(self):
return self.get_variables()
def set_weights(self, weights, rate=1.0):
if rate != 1:
current_weights = self.get_weights()
updated_weights = [(1 - rate) * t + rate * o for t, o in zip(current_weights, weights)]
else:
updated_weights = weights
for update_function, variable in zip(self.set_weights_ops, updated_weights):
update_function(variable)
def accumulate_gradients(self, inputs, targets):
# Neon doesn't currently allow separating the grads calculation and grad apply operations
# so this feature is not currently available. instead we do a full training iteration
inputs = force_list(inputs)
targets = force_list(targets)
for idx, input in enumerate(inputs):
inputs[idx] = input.swapaxes(0, -1)
for idx, target in enumerate(targets):
targets[idx] = np.rollaxis(target, 0, len(target.shape))
all_inputs = inputs + targets
loss = np.mean(self.train_op(*all_inputs))
return [loss]

View File

@@ -0,0 +1,88 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph.frontends.neon as neon
import ngraph as ng
from ngraph.util.names import name_scope
class InputEmbedder:
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
self.name = name
self.input_size = input_size
self.batch_size = batch_size
self.activation_function = activation_function
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.input = None
self.output = None
def __call__(self, prev_input_placeholder=None):
with name_scope(self.get_name()):
# create the input axes
axes = []
if len(self.input_size) == 2:
axis_names = ['H', 'W']
else:
axis_names = ['C', 'H', 'W']
for axis_size, axis_name in zip(self.input_size, axis_names):
axes.append(ng.make_axis(axis_size, name=axis_name))
batch_axis_full = ng.make_axis(self.batch_size, name='N')
input_axes = ng.make_axes(axes)
if prev_input_placeholder is None:
self.input = ng.placeholder(input_axes + [batch_axis_full])
else:
self.input = prev_input_placeholder
self._build_module()
return self.input, self.output(self.input)
def _build_module(self):
pass
def get_name(self):
return self.name
class ImageEmbedder(InputEmbedder):
def __init__(self, input_size, batch_size=None, input_rescaler=255.0, activation_function=neon.Rectlin(), name="embedder"):
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
self.input_rescaler = input_rescaler
def _build_module(self):
# image observation
self.output = neon.Sequential([
neon.Preprocess(functor=lambda x: x / self.input_rescaler),
neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init),
neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init),
neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init)
])
class VectorEmbedder(InputEmbedder):
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
def _build_module(self):
# vector observation
self.output = neon.Sequential([
neon.Affine(nout=256, activation=self.activation_function,
weight_init=self.weights_init, bias_init=self.biases_init)
])

View File

@@ -0,0 +1,191 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from architectures.neon_components.embedders import *
from architectures.neon_components.heads import *
from architectures.neon_components.middleware import *
from architectures.neon_components.architecture import *
from configurations import InputTypes, OutputTypes, MiddlewareTypes
class GeneralNeonNetwork(NeonArchitecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
self.global_network = global_network
self.network_is_local = network_is_local
self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.input_embedders = []
self.output_heads = []
self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function)
NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
def get_activation_function(self, activation_function_string):
activation_functions = {
'relu': neon.Rectlin(),
'tanh': neon.Tanh(),
'sigmoid': neon.Logistic(),
'elu': neon.Explin(),
'none': None
}
assert activation_function_string in activation_functions.keys(), \
"Activation function must be one of the following {}".format(activation_functions.keys())
return activation_functions[activation_function_string]
def get_input_embedder(self, embedder_type):
# the observation can be either an image or a vector
def get_observation_embedding(with_timestep=False):
if self.input_height > 1:
return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
name="observation")
else:
return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
name="observation")
input_mapping = {
InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
}
return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: None, # LSTM over Neon is currently not supported in Coach
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = {
OutputTypes.Q: QHead,
OutputTypes.DuelingQ: DuelingQHead,
OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.Pi: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
OutputTypes.DNDQ: None, # NEC over Neon is currently not supported in Coach
OutputTypes.NAF: None, # NAF over Neon is currently not supported in Coach
OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
OutputTypes.PPO_V: None # PPO over Neon is currently not supported in Coach
}
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
def get_model(self, tuning_parameters):
"""
:param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset
:return: A model
"""
assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
"Number of loss weights should match the number of output types"
local_network_in_distributed_training = self.global_network is not None and self.network_is_local
tuning_parameters.activation_function = self.activation_function
done_creating_input_placeholders = False
for network_idx in range(self.num_networks):
with name_scope('network_{}'.format(network_idx)):
####################
# Input Embeddings #
####################
state_embedding = []
for idx, input_type in enumerate(self.tp.agent.input_types):
# get the class of the input embedder
self.input_embedders.append(self.get_input_embedder(input_type))
# in the case each head uses a different network, we still reuse the input placeholders
prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
# create the input embedder instance and store the input placeholder and the embedding
input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
if len(self.inputs) < len(self.tp.agent.input_types):
self.inputs.append(input_placeholder)
state_embedding.append(embedding)
done_creating_input_placeholders = True
##############
# Middleware #
##############
state_embedding = ng.concat_along_axis(state_embedding, state_embedding[0].axes[0]) \
if len(state_embedding) > 1 else state_embedding[0]
self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
_, self.state_embedding = self.middleware_embedder(state_embedding)
################
# Output Heads #
################
for head_idx in range(self.num_heads_per_network):
for head_copy_idx in range(self.tp.agent.num_output_head_copies):
if self.tp.agent.use_separate_networks_per_head:
# if we use separate networks per head, then the head type corresponds top the network idx
head_type_idx = network_idx
else:
# if we use a single network with multiple heads, then the head type is the current head idx
head_type_idx = head_idx
self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
head_copy_idx,
self.tp.agent.loss_weights[head_type_idx]))
if self.network_is_local:
output, target_placeholder, input_placeholder = self.output_heads[-1](self.state_embedding)
self.targets.extend(target_placeholder)
else:
output, input_placeholder = self.output_heads[-1](self.state_embedding)
self.outputs.extend(output)
self.inputs.extend(input_placeholder)
# Losses
self.losses = []
for output_head in self.output_heads:
self.losses += output_head.loss
self.total_loss = sum(self.losses)
# Learning rate
if self.tp.learning_rate_decay_rate != 0:
raise Exception("learning rate decay is not supported in neon")
# Optimizer
if local_network_in_distributed_training and \
hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
# distributed training and this is the local network instantiation
self.optimizer = self.global_network.optimizer
else:
if tuning_parameters.agent.optimizer_type == 'Adam':
self.optimizer = neon.Adam(
learning_rate=tuning_parameters.learning_rate,
gradient_clip_norm=tuning_parameters.clip_gradients
)
elif tuning_parameters.agent.optimizer_type == 'RMSProp':
self.optimizer = neon.RMSProp(
learning_rate=tuning_parameters.learning_rate,
gradient_clip_norm=tuning_parameters.clip_gradients,
decay_rate=0.9,
epsilon=0.01
)
elif tuning_parameters.agent.optimizer_type == 'LBFGS':
raise Exception("LBFGS optimizer is not supported in neon")
else:
raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))

View File

@@ -0,0 +1,194 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
from ngraph.util.names import name_scope
import ngraph.frontends.neon as neon
import numpy as np
from utils import force_list
from architectures.neon_components.losses import *
class Head:
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
self.head_idx = head_idx
self.name = "head"
self.output = []
self.loss = []
self.loss_type = []
self.regularizations = []
self.loss_weight = force_list(loss_weight)
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.target = []
self.input = []
self.is_local = is_local
self.batch_size = tuning_parameters.batch_size
def __call__(self, input_layer):
"""
Wrapper for building the module graph including scoping and loss creation
:param input_layer: the input to the graph
:return: the output of the last layer and the target placeholder
"""
with name_scope(self.get_name()):
self._build_module(input_layer)
self.output = force_list(self.output)
self.target = force_list(self.target)
self.input = force_list(self.input)
self.loss_type = force_list(self.loss_type)
self.loss = force_list(self.loss)
self.regularizations = force_list(self.regularizations)
if self.is_local:
self.set_loss()
if self.is_local:
return self.output, self.target, self.input
else:
return self.output, self.input
def _build_module(self, input_layer):
"""
Builds the graph of the module
:param input_layer: the input to the graph
:return: None
"""
pass
def get_name(self):
"""
Get a formatted name for the module
:return: the formatted name
"""
return '{}_{}'.format(self.name, self.head_idx)
def set_loss(self):
"""
Creates a target placeholder and loss function for each loss_type and regularization
:param loss_type: a tensorflow loss function
:param scope: the name scope to include the tensors in
:return: None
"""
# add losses and target placeholder
for idx in range(len(self.loss_type)):
# output_axis = ng.make_axis(self.num_actions, name='q_values')
batch_axis_full = ng.make_axis(self.batch_size, name='N')
target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
self.target.append(target)
loss = self.loss_type[idx](self.target[-1], self.output[idx],
weights=self.loss_weight[idx], scope=self.get_name())
self.loss.append(loss)
# add regularizations
for regularization in self.regularizations:
self.loss.append(regularization)
class QHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# Standard Q Network
self.output = neon.Sequential([
neon.Affine(nout=self.num_actions,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
class DuelingQHead(QHead):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
def _build_module(self, input_layer):
# Dueling Network
# state value tower - V
output_axis = ng.make_axis(self.num_actions, name='q_values')
state_value = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=1,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action advantage tower - A
action_advantage_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(axes=output_axis,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
# merge to state-action value function Q
self.output = repeated_state_value + action_advantage
class MeasurementsPredictionHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'future_measurements_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.num_measurements = tuning_parameters.env.measurements_size[0] \
if tuning_parameters.env.measurements_size else 0
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
# actions expectation tower (expectation stream) - E
with name_scope("expectation_stream"):
expectation_stream = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action fine differences tower (action stream) - A
with name_scope("action_stream"):
action_stream_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=self.num_actions * multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Reshape((self.num_actions, multistep_measurements_size))
])(input_layer)
action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
# merge to future measurements predictions
self.output = repeated_expectation_stream + action_stream

View File

@@ -0,0 +1,28 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
import ngraph.frontends.neon as neon
from ngraph.util.names import name_scope
import numpy as np
def mean_squared_error(targets, outputs, weights=1.0, scope=""):
with name_scope(scope):
# TODO: reduce mean over the action axis
loss = ng.squared_L2(targets - outputs)
weighted_loss = loss * weights
return weighted_loss

View File

@@ -0,0 +1,50 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
import ngraph.frontends.neon as neon
from ngraph.util.names import name_scope
import numpy as np
class MiddlewareEmbedder:
def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
self.name = name
self.input = None
self.output = None
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.activation_function = activation_function
def __call__(self, input_layer):
with name_scope(self.get_name()):
self.input = input_layer
self._build_module()
return self.input, self.output(self.input)
def _build_module(self):
pass
def get_name(self):
return self.name
class FC_Embedder(MiddlewareEmbedder):
def _build_module(self):
self.output = neon.Sequential([
neon.Affine(nout=512, activation=self.activation_function,
weight_init=self.weights_init, bias_init=self.biases_init)])

View File

@@ -0,0 +1,179 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
from configurations import Preset, Frameworks
from logger import *
try:
import tensorflow as tf
from architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork
except ImportError:
failed_imports.append("TensorFlow")
try:
from architectures.neon_components.general_network import GeneralNeonNetwork
except ImportError:
failed_imports.append("Neon")
class NetworkWrapper:
def __init__(self, tuning_parameters, has_target, has_global, name, replicated_device=None, worker_device=None):
"""
:param tuning_parameters:
:type tuning_parameters: Preset
:param has_target:
:param has_global:
:param name:
:param replicated_device:
:param worker_device:
"""
self.tp = tuning_parameters
self.has_target = has_target
self.has_global = has_global
self.name = name
self.sess = tuning_parameters.sess
if self.tp.framework == Frameworks.TensorFlow:
general_network = GeneralTensorFlowNetwork
elif self.tp.framework == Frameworks.Neon:
general_network = GeneralNeonNetwork
else:
raise Exception("{} Framework is not supported".format(Frameworks().to_string(self.tp.framework)))
# Global network - the main network shared between threads
self.global_network = None
if self.has_global:
with tf.device(replicated_device):
self.global_network = general_network(tuning_parameters, '{}/global'.format(name),
network_is_local=False)
# Online network - local copy of the main network used for playing
self.online_network = None
with tf.device(worker_device):
self.online_network = general_network(tuning_parameters, '{}/online'.format(name),
self.global_network, network_is_local=True)
# Target network - a local, slow updating network used for stabilizing the learning
self.target_network = None
if self.has_target:
with tf.device(worker_device):
self.target_network = general_network(tuning_parameters, '{}/target'.format(name),
network_is_local=True)
if not self.tp.distributed and self.tp.framework == Frameworks.TensorFlow:
self.model_saver = tf.train.Saver()
if self.tp.sess and self.tp.checkpoint_restore_dir:
checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
screen.log_title("Loading checkpoint: {}".format(checkpoint))
self.model_saver.restore(self.tp.sess, checkpoint)
def sync(self):
"""
Initializes the weights of the networks to match each other
:return:
"""
self.update_online_network()
self.update_target_network()
def update_target_network(self, rate=1.0):
"""
Copy weights: online network >>> target network
:param rate: the rate of copying the weights - 1 for copying exactly
"""
if self.target_network:
self.target_network.set_weights(self.online_network.get_weights(), rate)
def update_online_network(self, rate=1.0):
"""
Copy weights: global network >>> online network
:param rate: the rate of copying the weights - 1 for copying exactly
"""
if self.global_network:
self.online_network.set_weights(self.global_network.get_weights(), rate)
def apply_gradients_to_global_network(self):
"""
Apply gradients from the online network on the global network
:return:
"""
self.global_network.apply_gradients(self.online_network.accumulated_gradients)
def apply_gradients_to_online_network(self):
"""
Apply gradients from the online network on itself
:return:
"""
self.online_network.apply_gradients(self.online_network.accumulated_gradients)
def train_and_sync_networks(self, inputs, targets):
"""
A generic training function that enables multi-threading training using a global network if necessary.
:param inputs: The inputs for the network.
:param targets: The targets corresponding to the given inputs
:return: The loss of the training iteration
"""
result = self.online_network.accumulate_gradients(inputs, targets)
self.apply_gradients_and_sync_networks()
return result
def apply_gradients_and_sync_networks(self):
"""
Applies the gradients accumulated in the online network to the global network or to itself and syncs the
networks if necessary
"""
if self.global_network:
self.apply_gradients_to_global_network()
self.online_network.reset_accumulated_gradients()
self.update_online_network()
else:
self.online_network.apply_and_reset_gradients(self.online_network.accumulated_gradients)
def get_local_variables(self):
"""
Get all the variables that are local to the thread
:return: a list of all the variables that are local to the thread
"""
local_variables = [v for v in tf.global_variables() if self.online_network.name in v.name]
if self.has_target:
local_variables += [v for v in tf.global_variables() if self.target_network.name in v.name]
return local_variables
def get_global_variables(self):
"""
Get all the variables that are shared between threads
:return: a list of all the variables that are shared between threads
"""
global_variables = [v for v in tf.global_variables() if self.global_network.name in v.name]
return global_variables
def set_session(self, sess):
self.sess = sess
self.online_network.sess = sess
if self.global_network:
self.global_network.sess = sess
if self.target_network:
self.target_network.sess = sess
def save_model(self, model_id):
saved_model_path = self.model_saver.save(self.tp.sess, os.path.join(self.tp.save_model_dir,
str(model_id) + '.ckpt'))
screen.log_dict(
OrderedDict([
("Saving model", saved_model_path),
]),
prefix="Checkpoint"
)

View File

@@ -0,0 +1,290 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from architectures.architecture import Architecture
import tensorflow as tf
from utils import force_list, squeeze_list
from configurations import Preset, MiddlewareTypes
import numpy as np
import time
class TensorFlowArchitecture(Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
"""
:param tuning_parameters: The parameters used for running the algorithm
:type tuning_parameters: Preset
:param name: The name of the network
"""
Architecture.__init__(self, tuning_parameters, name)
self.middleware_embedder = None
self.network_is_local = network_is_local
assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
self.sess = tuning_parameters.sess
self.inputs = []
self.outputs = []
self.targets = []
self.losses = []
self.total_loss = None
self.trainable_weights = []
self.weights_placeholders = []
self.curr_rnn_c_in = None
self.curr_rnn_h_in = None
self.gradients_wrt_inputs = []
self.optimizer_type = self.tp.agent.optimizer_type
if self.tp.seed is not None:
tf.set_random_seed(self.tp.seed)
with tf.variable_scope(self.name, initializer=tf.contrib.layers.xavier_initializer()):
self.global_step = tf.contrib.framework.get_or_create_global_step()
# build the network
self.get_model(tuning_parameters)
# model weights
self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
# locks for synchronous training
if self.tp.distributed and not self.tp.agent.async_training and not self.network_is_local:
self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
initializer=tf.constant_initializer(0, dtype=tf.int32),
trainable=False)
self.lock = self.lock_counter.assign_add(1, use_locking=True)
self.lock_init = self.lock_counter.assign(0)
self.release_counter = tf.get_variable("release_counter", [], tf.int32,
initializer=tf.constant_initializer(0, dtype=tf.int32),
trainable=False)
self.release = self.release_counter.assign_add(1, use_locking=True)
self.release_init = self.release_counter.assign(0)
# local network does the optimization so we need to create all the ops we are going to use to optimize
for idx, var in enumerate(self.trainable_weights):
placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
self.weights_placeholders.append(placeholder)
self.update_weights_from_list = [weights.assign(holder) for holder, weights in
zip(self.weights_placeholders, self.trainable_weights)]
# gradients ops
self.tensor_gradients = tf.gradients(self.total_loss, self.trainable_weights)
self.gradients_norm = tf.global_norm(self.tensor_gradients)
if self.tp.clip_gradients is not None and self.tp.clip_gradients != 0:
self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients,
tuning_parameters.clip_gradients)
# gradients of the outputs w.r.t. the inputs
if len(self.outputs) == 1:
self.gradients_wrt_inputs = [tf.gradients(self.outputs[0], input_ph) for input_ph in self.inputs]
self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
# L2 regularization
if self.tp.agent.l2_regularization != 0:
self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.trainable_weights])
* self.tp.agent.l2_regularization]
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)
self.inc_step = self.global_step.assign_add(1)
# defining the optimization process (for LBFGS we have less control over the optimizer)
if self.optimizer_type != 'LBFGS':
# no global network, this is a plain simple centralized training
self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)
# initialize or restore model
if not self.tp.distributed:
self.init_op = tf.global_variables_initializer()
if self.sess:
self.sess.run(self.init_op)
self.accumulated_gradients = None
def reset_accumulated_gradients(self):
"""
Reset the gradients accumulation placeholder
"""
if self.accumulated_gradients is None:
self.accumulated_gradients = self.tp.sess.run(self.trainable_weights)
for ix, grad in enumerate(self.accumulated_gradients):
self.accumulated_gradients[ix] = grad * 0
def accumulate_gradients(self, inputs, targets, additional_fetches=None):
"""
Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation
placeholders
:param additional_fetches: Optional tensors to fetch during gradients calculation
:param inputs: The input batch for the network
:param targets: The targets corresponding to the input batch
:return: A list containing the total loss and the individual network heads losses
"""
if self.accumulated_gradients is None:
self.reset_accumulated_gradients()
# feed inputs
if additional_fetches is None:
additional_fetches = []
inputs = force_list(inputs)
feed_dict = dict(zip(self.inputs, inputs))
# feed targets
targets = force_list(targets)
for placeholder_idx, target in enumerate(targets):
feed_dict[self.targets[placeholder_idx]] = target
if self.optimizer_type != 'LBFGS':
# set the fetches
fetches = [self.gradients_norm]
if self.tp.clip_gradients:
fetches.append(self.clipped_grads)
else:
fetches.append(self.tensor_gradients)
fetches += [self.total_loss, self.losses]
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
fetches.append(self.middleware_embedder.state_out)
additional_fetches_start_idx = len(fetches)
fetches += additional_fetches
# feed the lstm state if necessary
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
# get grads
result = self.tp.sess.run(fetches, feed_dict=feed_dict)
# extract the fetches
norm_unclipped_grads, grads, total_loss, losses = result[:4]
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
(self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
fetched_tensors = []
if len(additional_fetches) > 0:
fetched_tensors = result[additional_fetches_start_idx:]
# accumulate the gradients
for idx, grad in enumerate(grads):
self.accumulated_gradients[idx] += grad
return total_loss, losses, norm_unclipped_grads, fetched_tensors
else:
self.optimizer.minimize(session=self.tp.sess, feed_dict=feed_dict)
return [0]
def apply_and_reset_gradients(self, gradients, scaler=1.):
"""
Applies the given gradients to the network weights and resets the accumulation placeholder
:param gradients: The gradients to use for the update
:param scaler: A scaling factor that allows rescaling the gradients before applying them
"""
self.apply_gradients(gradients, scaler)
self.reset_accumulated_gradients()
def apply_gradients(self, gradients, scaler=1.):
"""
Applies the given gradients to the network weights
:param gradients: The gradients to use for the update
:param scaler: A scaling factor that allows rescaling the gradients before applying them
"""
if self.tp.agent.async_training or not self.tp.distributed:
if hasattr(self, 'global_step') and not self.network_is_local:
self.tp.sess.run(self.inc_step)
if self.optimizer_type != 'LBFGS':
# lock barrier
if hasattr(self, 'lock_counter'):
self.tp.sess.run(self.lock)
while self.tp.sess.run(self.lock_counter) % self.tp.num_threads != 0:
time.sleep(0.00001)
# rescale the gradients so that they average out with the gradients from the other workers
scaler /= float(self.tp.num_threads)
# apply gradients
if scaler != 1.:
for gradient in gradients:
gradient /= scaler
feed_dict = dict(zip(self.weights_placeholders, gradients))
_ = self.tp.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict)
# release barrier
if hasattr(self, 'release_counter'):
self.tp.sess.run(self.release)
while self.tp.sess.run(self.release_counter) % self.tp.num_threads != 0:
time.sleep(0.00001)
def predict(self, inputs):
"""
Run a forward pass of the network using the given input
:param inputs: The input for the network
:return: The network output
"""
feed_dict = dict(zip(self.inputs, force_list(inputs)))
if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.tp.sess.run([self.outputs, self.middleware_embedder.state_out], feed_dict=feed_dict)
else:
output = self.tp.sess.run(self.outputs, feed_dict)
return squeeze_list(output)
def train_on_batch(self, inputs, targets, scaler=1., additional_fetches=None):
"""
Given a batch of examples and targets, runs a forward pass & backward pass and then applies the gradients
:param additional_fetches: Optional tensors to fetch during the training process
:param inputs: The input for the network
:param targets: The targets corresponding to the input batch
:param scaler: A scaling factor that allows rescaling the gradients before applying them
:return: The loss of the network
"""
if additional_fetches is None:
additional_fetches = []
force_list(additional_fetches)
loss = self.accumulate_gradients(inputs, targets, additional_fetches=additional_fetches)
self.apply_and_reset_gradients(self.accumulated_gradients, scaler)
return loss
def get_weights(self):
"""
:return: a list of tensors containing the network weights for each layer
"""
return self.trainable_weights
def set_weights(self, weights, new_rate=1.0):
"""
Sets the network weights from the given list of weights tensors
"""
feed_dict = {}
old_weights, new_weights = self.tp.sess.run([self.get_weights(), weights])
for placeholder_idx, new_weight in enumerate(new_weights):
feed_dict[self.weights_placeholders[placeholder_idx]]\
= new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx]
self.tp.sess.run(self.update_weights_from_list, feed_dict)
def write_graph_to_logdir(self, summary_dir):
"""
Writes the tensorflow graph to the logdir for tensorboard visualization
:param summary_dir: the path to the logdir
"""
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_graph(self.sess.graph)

View File

@@ -0,0 +1,73 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import tensorflow as tf
class InputEmbedder:
def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
self.name = name
self.input_size = input_size
self.activation_function = activation_function
self.input = None
self.output = None
def __call__(self, prev_input_placeholder=None):
with tf.variable_scope(self.get_name()):
if prev_input_placeholder is None:
self.input = tf.placeholder("float", shape=(None,) + self.input_size, name=self.get_name())
else:
self.input = prev_input_placeholder
self._build_module()
return self.input, self.output
def _build_module(self):
pass
def get_name(self):
return self.name
class ImageEmbedder(InputEmbedder):
def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, name)
self.input_rescaler = input_rescaler
def _build_module(self):
# image observation
rescaled_observation_stack = self.input / self.input_rescaler
self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
filters=32, kernel_size=(8, 8), strides=(4, 4),
activation=self.activation_function, data_format='channels_last')
self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
filters=64, kernel_size=(4, 4), strides=(2, 2),
activation=self.activation_function, data_format='channels_last')
self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
filters=64, kernel_size=(3, 3), strides=(1, 1),
activation=self.activation_function, data_format='channels_last')
self.output = tf.contrib.layers.flatten(self.observation_conv3)
class VectorEmbedder(InputEmbedder):
def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
InputEmbedder.__init__(self, input_size, activation_function, name)
def _build_module(self):
# vector observation
input_layer = tf.contrib.layers.flatten(self.input)
self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function)

View File

@@ -0,0 +1,190 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from architectures.tensorflow_components.embedders import *
from architectures.tensorflow_components.heads import *
from architectures.tensorflow_components.middleware import *
from architectures.tensorflow_components.architecture import *
from configurations import InputTypes, OutputTypes, MiddlewareTypes
class GeneralTensorFlowNetwork(TensorFlowArchitecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
self.global_network = global_network
self.network_is_local = network_is_local
self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.input_embedders = []
self.output_heads = []
self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function)
TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
def get_activation_function(self, activation_function_string):
activation_functions = {
'relu': tf.nn.relu,
'tanh': tf.nn.tanh,
'sigmoid': tf.nn.sigmoid,
'elu': tf.nn.elu,
'none': None
}
assert activation_function_string in activation_functions.keys(), \
"Activation function must be one of the following {}".format(activation_functions.keys())
return activation_functions[activation_function_string]
def get_input_embedder(self, embedder_type):
# the observation can be either an image or a vector
def get_observation_embedding(with_timestep=False):
if self.input_height > 1:
return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation")
else:
return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
input_mapping = {
InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
}
return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: LSTM_Embedder,
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = {
OutputTypes.Q: QHead,
OutputTypes.DuelingQ: DuelingQHead,
OutputTypes.V: VHead,
OutputTypes.Pi: PolicyHead,
OutputTypes.MeasurementsPrediction: MeasurementsPredictionHead,
OutputTypes.DNDQ: DNDQHead,
OutputTypes.NAF: NAFHead,
OutputTypes.PPO: PPOHead,
OutputTypes.PPO_V : PPOVHead,
OutputTypes.DistributionalQ: DistributionalQHead
}
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
def get_model(self, tuning_parameters):
"""
:param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset
:return: A model
"""
assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
"Number of loss weights should match the number of output types"
local_network_in_distributed_training = self.global_network is not None and self.network_is_local
tuning_parameters.activation_function = self.activation_function
done_creating_input_placeholders = False
for network_idx in range(self.num_networks):
with tf.variable_scope('network_{}'.format(network_idx)):
####################
# Input Embeddings #
####################
state_embedding = []
for idx, input_type in enumerate(self.tp.agent.input_types):
# get the class of the input embedder
self.input_embedders.append(self.get_input_embedder(input_type))
# in the case each head uses a different network, we still reuse the input placeholders
prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
# create the input embedder instance and store the input placeholder and the embedding
input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
if len(self.inputs) < len(self.tp.agent.input_types):
self.inputs.append(input_placeholder)
state_embedding.append(embedding)
done_creating_input_placeholders = True
##############
# Middleware #
##############
state_embedding = tf.concat(state_embedding, axis=-1) if len(state_embedding) > 1 else state_embedding[0]
self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
_, self.state_embedding = self.middleware_embedder(state_embedding)
################
# Output Heads #
################
for head_idx in range(self.num_heads_per_network):
for head_copy_idx in range(self.tp.agent.num_output_head_copies):
if self.tp.agent.use_separate_networks_per_head:
# if we use separate networks per head, then the head type corresponds top the network idx
head_type_idx = network_idx
else:
# if we use a single network with multiple heads, then the head type is the current head idx
head_type_idx = head_idx
self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
head_copy_idx,
self.tp.agent.loss_weights[head_type_idx]))
if self.tp.agent.stop_gradients_from_head[head_idx]:
head_input = tf.stop_gradient(self.state_embedding)
else:
head_input = self.state_embedding
# build the head
if self.network_is_local:
output, target_placeholder, input_placeholder = self.output_heads[-1](head_input)
self.targets.extend(target_placeholder)
else:
output, input_placeholder = self.output_heads[-1](head_input)
self.outputs.extend(output)
self.inputs.extend(input_placeholder)
# Losses
self.losses = tf.losses.get_losses(self.name)
self.losses += tf.losses.get_regularization_losses(self.name)
self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
# Learning rate
if self.tp.learning_rate_decay_rate != 0:
self.tp.learning_rate = tf.train.exponential_decay(
self.tp.learning_rate, self.global_step, decay_steps=self.tp.learning_rate_decay_steps,
decay_rate=self.tp.learning_rate_decay_rate, staircase=True)
# Optimizer
if local_network_in_distributed_training and \
hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
# distributed training and this is the local network instantiation
self.optimizer = self.global_network.optimizer
else:
if tuning_parameters.agent.optimizer_type == 'Adam':
self.optimizer = tf.train.AdamOptimizer(learning_rate=tuning_parameters.learning_rate)
elif tuning_parameters.agent.optimizer_type == 'RMSProp':
self.optimizer = tf.train.RMSPropOptimizer(self.tp.learning_rate, decay=0.9, epsilon=0.01)
elif tuning_parameters.agent.optimizer_type == 'LBFGS':
self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.total_loss, method='L-BFGS-B',
options={'maxiter': 25})
else:
raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))

View File

@@ -0,0 +1,481 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import tensorflow as tf
import numpy as np
from utils import force_list
# Used to initialize weights for policy and value output layers
def normalized_columns_initializer(std=1.0):
def _initializer(shape, dtype=None, partition_info=None):
out = np.random.randn(*shape).astype(np.float32)
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
return tf.constant(out)
return _initializer
class Head:
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
self.head_idx = head_idx
self.name = "head"
self.output = []
self.loss = []
self.loss_type = []
self.regularizations = []
self.loss_weight = force_list(loss_weight)
self.target = []
self.input = []
self.is_local = is_local
def __call__(self, input_layer):
"""
Wrapper for building the module graph including scoping and loss creation
:param input_layer: the input to the graph
:return: the output of the last layer and the target placeholder
"""
with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
self._build_module(input_layer)
self.output = force_list(self.output)
self.target = force_list(self.target)
self.input = force_list(self.input)
self.loss_type = force_list(self.loss_type)
self.loss = force_list(self.loss)
self.regularizations = force_list(self.regularizations)
if self.is_local:
self.set_loss()
if self.is_local:
return self.output, self.target, self.input
else:
return self.output, self.input
def _build_module(self, input_layer):
"""
Builds the graph of the module
:param input_layer: the input to the graph
:return: None
"""
pass
def get_name(self):
"""
Get a formatted name for the module
:return: the formatted name
"""
return '{}_{}'.format(self.name, self.head_idx)
def set_loss(self):
"""
Creates a target placeholder and loss function for each loss_type and regularization
:param loss_type: a tensorflow loss function
:param scope: the name scope to include the tensors in
:return: None
"""
# add losses and target placeholder
for idx in range(len(self.loss_type)):
target = tf.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name()))
self.target.append(target)
loss = self.loss_type[idx](self.target[-1], self.output[idx],
weights=self.loss_weight[idx], scope=self.get_name())
self.loss.append(loss)
# add regularizations
for regularization in self.regularizations:
self.loss.append(regularization)
class QHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
if tuning_parameters.agent.replace_mse_with_huber_loss:
self.loss_type = tf.losses.huber_loss
else:
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
# Standard Q Network
self.output = tf.layers.dense(input_layer, self.num_actions, name='output')
class DuelingQHead(QHead):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
def _build_module(self, input_layer):
# state value tower - V
with tf.variable_scope("state_value"):
state_value = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
state_value = tf.layers.dense(state_value, 1)
# state_value = tf.expand_dims(state_value, axis=-1)
# action advantage tower - A
with tf.variable_scope("action_advantage"):
action_advantage = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
action_advantage = tf.layers.dense(action_advantage, self.num_actions)
action_advantage = action_advantage - tf.reduce_mean(action_advantage)
# merge to state-action value function Q
self.output = tf.add(state_value, action_advantage, name='output')
class VHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'v_values_head'
if tuning_parameters.agent.replace_mse_with_huber_loss:
self.loss_type = tf.losses.huber_loss
else:
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
# Standard V Network
self.output = tf.layers.dense(input_layer, 1, name='output',
kernel_initializer=normalized_columns_initializer(1.0))
class PolicyHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'policy_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
self.discrete_controls = tuning_parameters.env_instance.discrete_controls
self.exploration_policy = tuning_parameters.exploration.policy
self.exploration_variance = 2*self.output_scale*tuning_parameters.exploration.initial_noise_variance_percentage
if not self.discrete_controls and not self.output_scale:
raise ValueError("For continuous controls, an output scale for the network must be specified")
self.beta = tuning_parameters.agent.beta_entropy
def _build_module(self, input_layer):
eps = 1e-15
if self.discrete_controls:
self.actions = tf.placeholder(tf.int32, [None], name="actions")
else:
self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
self.input = [self.actions]
# Policy Head
if self.discrete_controls:
policy_values = tf.layers.dense(input_layer, self.num_actions)
self.policy_mean = tf.nn.softmax(policy_values, name="policy")
# define the distributions for the policy and the old policy
self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
self.output = self.policy_mean
else:
# mean
policy_values_mean = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh)
self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')
self.output = [self.policy_mean]
# std
if self.exploration_policy == 'ContinuousEntropy':
policy_values_std = tf.layers.dense(input_layer, self.num_actions,
kernel_initializer=normalized_columns_initializer(0.01))
self.policy_std = tf.nn.softplus(policy_values_std, name='output_variance') + eps
self.output.append(self.policy_std)
else:
self.policy_std = tf.constant(self.exploration_variance, dtype='float32', shape=(self.num_actions,))
# define the distributions for the policy and the old policy
self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean,
self.policy_std)
if self.is_local:
# add entropy regularization
if self.beta:
self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
# calculate loss
self.action_log_probs_wrt_policy = self.policy_distribution.log_prob(self.actions)
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
self.target = self.advantages
self.loss = -tf.reduce_mean(self.action_log_probs_wrt_policy * self.advantages)
tf.losses.add_loss(self.loss_weight[0] * self.loss)
class MeasurementsPredictionHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'future_measurements_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.num_measurements = tuning_parameters.env.measurements_size[0] \
if tuning_parameters.env.measurements_size else 0
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
if tuning_parameters.agent.replace_mse_with_huber_loss:
self.loss_type = tf.losses.huber_loss
else:
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
# actions expectation tower (expectation stream) - E
with tf.variable_scope("expectation_stream"):
expectation_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
expectation_stream = tf.layers.dense(expectation_stream, self.multi_step_measurements_size)
expectation_stream = tf.expand_dims(expectation_stream, axis=1)
# action fine differences tower (action stream) - A
with tf.variable_scope("action_stream"):
action_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
action_stream = tf.layers.dense(action_stream, self.num_actions * self.multi_step_measurements_size)
action_stream = tf.reshape(action_stream,
(tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True)
# merge to future measurements predictions
self.output = tf.add(expectation_stream, action_stream, name='output')
class DNDQHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'dnd_q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.DND_size = tuning_parameters.agent.dnd_size
self.DND_key_error_threshold = tuning_parameters.agent.DND_key_error_threshold
self.l2_norm_added_delta = tuning_parameters.agent.l2_norm_added_delta
self.new_value_shift_coefficient = tuning_parameters.agent.new_value_shift_coefficient
self.number_of_nn = tuning_parameters.agent.number_of_knn
if tuning_parameters.agent.replace_mse_with_huber_loss:
self.loss_type = tf.losses.huber_loss
else:
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
# DND based Q head
from memories import differentiable_neural_dictionary
self.DND = differentiable_neural_dictionary. QDND(
self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient,
key_error_threshold=self.DND_key_error_threshold)
# Retrieve info from DND dictionary
self.action = tf.placeholder(tf.int8, [None], name="action")
self.input = self.action
result = tf.py_func(self.DND.query,
[input_layer, self.action, self.number_of_nn],
[tf.float64, tf.float64])
self.dnd_embeddings = tf.to_float(result[0])
self.dnd_values = tf.to_float(result[1])
# DND calculation
square_diff = tf.square(self.dnd_embeddings - tf.expand_dims(input_layer, 1))
distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
weights = 1.0 / distances
normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True)
self.output = tf.reduce_sum(self.dnd_values * normalised_weights, axis=1)
class NAFHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'naf_q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
if tuning_parameters.agent.replace_mse_with_huber_loss:
self.loss_type = tf.losses.huber_loss
else:
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
# NAF
self.action = tf.placeholder(tf.float32, [None, self.num_actions], name="action")
self.input = self.action
# V Head
self.V = tf.layers.dense(input_layer, 1, name='V')
# mu Head
mu_unscaled = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh, name='mu_unscaled')
self.mu = tf.multiply(mu_unscaled, self.output_scale, name='mu')
# A Head
# l_vector is a vector that includes a lower-triangular matrix values
self.l_vector = tf.layers.dense(input_layer, (self.num_actions * (self.num_actions + 1)) / 2, name='l_vector')
# Convert l to a lower triangular matrix and exponentiate its diagonal
i = 0
columns = []
for col in range(self.num_actions):
start_row = col
num_non_zero_elements = self.num_actions - start_row
zeros_column_part = tf.zeros_like(self.l_vector[:, 0:start_row])
diag_element = tf.expand_dims(tf.exp(self.l_vector[:, i]), 1)
non_zeros_non_diag_column_part = self.l_vector[:, (i + 1):(i + num_non_zero_elements)]
columns.append(tf.concat([zeros_column_part, diag_element, non_zeros_non_diag_column_part], axis=1))
i += num_non_zero_elements
self.L = tf.transpose(tf.stack(columns, axis=1), (0, 2, 1))
# P = L*L^T
self.P = tf.matmul(self.L, tf.transpose(self.L, (0, 2, 1)))
# A = -1/2 * (u - mu)^T * P * (u - mu)
action_diff = tf.expand_dims(self.action - self.mu, -1)
a_matrix_form = -0.5 * tf.matmul(tf.transpose(action_diff, (0, 2, 1)), tf.matmul(self.P, action_diff))
self.A = tf.reshape(a_matrix_form, [-1, 1])
# Q Head
self.Q = tf.add(self.V, self.A, name='Q')
self.output = self.Q
class PPOHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'ppo_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.discrete_controls = tuning_parameters.env_instance.discrete_controls
self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
self.kl_coefficient = tf.Variable(tuning_parameters.agent.initial_kl_coefficient,
trainable=False, name='kl_coefficient')
self.kl_cutoff = 2*tuning_parameters.agent.target_kl_divergence
self.high_kl_penalty_coefficient = tuning_parameters.agent.high_kl_penalty_coefficient
self.clip_likelihood_ratio_using_epsilon = tuning_parameters.agent.clip_likelihood_ratio_using_epsilon
self.use_kl_regularization = tuning_parameters.agent.use_kl_regularization
self.beta = tuning_parameters.agent.beta_entropy
def _build_module(self, input_layer):
eps = 1e-15
if self.discrete_controls:
self.actions = tf.placeholder(tf.int32, [None], name="actions")
else:
self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
self.old_policy_mean = tf.placeholder(tf.float32, [None, self.num_actions], "old_policy_mean")
self.old_policy_std = tf.placeholder(tf.float32, [None, self.num_actions], "old_policy_std")
# Policy Head
if self.discrete_controls:
self.input = [self.actions, self.old_policy_mean]
policy_values = tf.layers.dense(input_layer, self.num_actions)
self.policy_mean = tf.nn.softmax(policy_values, name="policy")
# define the distributions for the policy and the old policy
self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
self.output = self.policy_mean
else:
self.input = [self.actions, self.old_policy_mean, self.old_policy_std]
self.policy_mean = tf.layers.dense(input_layer, self.num_actions, name='policy_mean')
self.policy_logstd = tf.Variable(np.zeros((1, self.num_actions)), dtype='float32')
self.policy_std = tf.tile(tf.exp(self.policy_logstd), [tf.shape(input_layer)[0], 1], name='policy_std')
# define the distributions for the policy and the old policy
self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean,
self.policy_std)
self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean,
self.old_policy_std)
self.output = [self.policy_mean, self.policy_std]
self.action_probs_wrt_policy = tf.exp(self.policy_distribution.log_prob(self.actions))
self.action_probs_wrt_old_policy = tf.exp(self.old_policy_distribution.log_prob(self.actions))
self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
# add kl divergence regularization
self.kl_divergence = tf.reduce_mean(tf.contrib.distributions.kl_divergence(self.old_policy_distribution,
self.policy_distribution))
if self.use_kl_regularization:
# no clipping => use kl regularization
self.weighted_kl_divergence = tf.multiply(self.kl_coefficient, self.kl_divergence)
self.regularizations = self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
# calculate surrogate loss
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
self.target = self.advantages
self.likelihood_ratio = self.action_probs_wrt_policy / self.action_probs_wrt_old_policy
if self.clip_likelihood_ratio_using_epsilon is not None:
max_value = 1 + self.clip_likelihood_ratio_using_epsilon
min_value = 1 - self.clip_likelihood_ratio_using_epsilon
self.clipped_likelihood_ratio = tf.clip_by_value(self.likelihood_ratio, min_value, max_value)
self.scaled_advantages = tf.minimum(self.likelihood_ratio * self.advantages,
self.clipped_likelihood_ratio * self.advantages)
else:
self.scaled_advantages = self.likelihood_ratio * self.advantages
# minus sign is in order to set an objective to minimize (we actually strive for maximizing the surrogate loss)
self.surrogate_loss = -tf.reduce_mean(self.scaled_advantages)
if self.is_local:
# add entropy regularization
if self.beta:
self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
self.loss = self.surrogate_loss
tf.losses.add_loss(self.loss)
class PPOVHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'ppo_v_head'
self.clip_likelihood_ratio_using_epsilon = tuning_parameters.agent.clip_likelihood_ratio_using_epsilon
def _build_module(self, input_layer):
self.old_policy_value = tf.placeholder(tf.float32, [None], "old_policy_values")
self.input = [self.old_policy_value]
self.output = tf.layers.dense(input_layer, 1, name='output',
kernel_initializer=normalized_columns_initializer(1.0))
self.target = self.total_return = tf.placeholder(tf.float32, [None], name="total_return")
value_loss_1 = tf.square(self.output - self.target)
value_loss_2 = tf.square(self.old_policy_value +
tf.clip_by_value(self.output - self.old_policy_value,
-self.clip_likelihood_ratio_using_epsilon,
self.clip_likelihood_ratio_using_epsilon) - self.target)
self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
self.loss = self.vf_loss
tf.losses.add_loss(self.loss)
class DistributionalQHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'distributional_dqn_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.num_atoms = tuning_parameters.agent.atoms
def _build_module(self, input_layer):
self.actions = tf.placeholder(tf.int32, [None], name="actions")
self.input = [self.actions]
values_distribution = tf.layers.dense(input_layer, self.num_actions * self.num_atoms)
values_distribution = tf.reshape(values_distribution, (tf.shape(values_distribution)[0], self.num_actions, self.num_atoms))
# softmax on atoms dimension
self.output = tf.nn.softmax(values_distribution)
# calculate cross entropy loss
self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), name="distributions")
self.target = self.distributions
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
tf.losses.add_loss(self.loss)

View File

@@ -0,0 +1,65 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import tensorflow as tf
import numpy as np
class MiddlewareEmbedder:
def __init__(self, activation_function=tf.nn.relu, name="middleware_embedder"):
self.name = name
self.input = None
self.output = None
self.activation_function = activation_function
def __call__(self, input_layer):
with tf.variable_scope(self.get_name()):
self.input = input_layer
self._build_module()
return self.input, self.output
def _build_module(self):
pass
def get_name(self):
return self.name
class LSTM_Embedder(MiddlewareEmbedder):
def _build_module(self):
middleware = tf.layers.dense(self.input, 512, activation=self.activation_function)
lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
self.state_init = [self.c_init, self.h_init]
self.c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
self.h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
self.state_in = (self.c_in, self.h_in)
rnn_in = tf.expand_dims(middleware, [0])
step_size = tf.shape(middleware)[:1]
state_in = tf.contrib.rnn.LSTMStateTuple(self.c_in, self.h_in)
lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False)
lstm_c, lstm_h = lstm_state
self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
self.output = tf.reshape(lstm_outputs, [-1, 256])
class FC_Embedder(MiddlewareEmbedder):
def _build_module(self):
self.output = tf.layers.dense(self.input, 512, activation=self.activation_function)

View File

@@ -0,0 +1,81 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import tensorflow as tf
import numpy as np
class SharedRunningStats(object):
def __init__(self, tuning_parameters, replicated_device, epsilon=1e-2, shape=(), name=""):
self.tp = tuning_parameters
with tf.device(replicated_device):
with tf.variable_scope(name):
self._sum = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="running_sum", trainable=False)
self._sum_squared = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(epsilon),
name="running_sum_squared", trainable=False)
self._count = tf.get_variable(
dtype=tf.float64,
shape=(),
initializer=tf.constant_initializer(epsilon),
name="count", trainable=False)
self._shape = shape
self._mean = tf.to_float(self._sum / self._count)
self._std = tf.sqrt(tf.maximum(tf.to_float(self._sum_squared / self._count) - tf.square(self._mean), 1e-2))
self.new_sum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
self.new_sum_squared = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
self.newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
self._inc_sum = tf.assign_add(self._sum, self.new_sum, use_locking=True)
self._inc_sum_squared = tf.assign_add(self._sum_squared, self.new_sum_squared, use_locking=True)
self._inc_count = tf.assign_add(self._count, self.newcount, use_locking=True)
def push(self, x):
x = x.astype('float64')
self.tp.sess.run([self._inc_sum, self._inc_sum_squared, self._inc_count],
feed_dict={
self.new_sum: x.sum(axis=0).ravel(),
self.new_sum_squared: np.square(x).sum(axis=0).ravel(),
self.newcount: np.array(len(x), dtype='float64')
})
@property
def n(self):
return self.tp.sess.run(self._count)
@property
def mean(self):
return self.tp.sess.run(self._mean)
@property
def var(self):
return self.std ** 2
@property
def std(self):
return self.tp.sess.run(self._std)
@property
def shape(self):
return self._shape