1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-09 22:53:40 +02:00

coach v0.8.0

This commit is contained in:
Gal Leibovich
2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions

View File

@@ -0,0 +1,129 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
import copy
from ngraph.frontends.neon import *
import ngraph as ng
from architectures.architecture import *
import numpy as np
from utils import *
class NeonArchitecture(Architecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
Architecture.__init__(self, tuning_parameters, name)
assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
self.clip_error = tuning_parameters.clip_gradients
self.total_loss = None
self.epoch = 0
self.inputs = []
self.outputs = []
self.targets = []
self.losses = []
self.transformer = tuning_parameters.sess
self.network = self.get_model(tuning_parameters)
self.accumulated_gradients = []
# training and inference ops
train_output = ng.sequential([
self.optimizer(self.total_loss),
self.total_loss
])
placeholders = self.inputs + self.targets
self.train_op = self.transformer.add_computation(
ng.computation(
train_output, *placeholders
)
)
self.predict_op = self.transformer.add_computation(
ng.computation(
self.outputs, self.inputs[0]
)
)
# update weights from array op
self.weights = [ng.placeholder(w.axes) for w in self.total_loss.variables()]
self.set_weights_ops = []
for target_variable, variable in zip(self.total_loss.variables(), self.weights):
self.set_weights_ops.append(self.transformer.add_computation(
ng.computation(
ng.assign(target_variable, variable), variable
)
))
# get weights op
self.get_variables = self.transformer.add_computation(
ng.computation(
self.total_loss.variables()
)
)
def predict(self, inputs):
batch_size = inputs.shape[0]
# move batch axis to the end
inputs = inputs.swapaxes(0, -1)
prediction = self.predict_op(inputs) # TODO: problem with multiple inputs
if type(prediction) != tuple:
prediction = (prediction)
# process all the outputs from the network
output = []
for p in prediction:
output.append(p.transpose()[:batch_size].copy())
# if there is only one output then we don't need a list
if len(output) == 1:
output = output[0]
return output
def train_on_batch(self, inputs, targets):
loss = self.accumulate_gradients(inputs, targets)
self.apply_and_reset_gradients(self.accumulated_gradients)
return loss
def get_weights(self):
return self.get_variables()
def set_weights(self, weights, rate=1.0):
if rate != 1:
current_weights = self.get_weights()
updated_weights = [(1 - rate) * t + rate * o for t, o in zip(current_weights, weights)]
else:
updated_weights = weights
for update_function, variable in zip(self.set_weights_ops, updated_weights):
update_function(variable)
def accumulate_gradients(self, inputs, targets):
# Neon doesn't currently allow separating the grads calculation and grad apply operations
# so this feature is not currently available. instead we do a full training iteration
inputs = force_list(inputs)
targets = force_list(targets)
for idx, input in enumerate(inputs):
inputs[idx] = input.swapaxes(0, -1)
for idx, target in enumerate(targets):
targets[idx] = np.rollaxis(target, 0, len(target.shape))
all_inputs = inputs + targets
loss = np.mean(self.train_op(*all_inputs))
return [loss]

View File

@@ -0,0 +1,88 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph.frontends.neon as neon
import ngraph as ng
from ngraph.util.names import name_scope
class InputEmbedder:
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
self.name = name
self.input_size = input_size
self.batch_size = batch_size
self.activation_function = activation_function
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.input = None
self.output = None
def __call__(self, prev_input_placeholder=None):
with name_scope(self.get_name()):
# create the input axes
axes = []
if len(self.input_size) == 2:
axis_names = ['H', 'W']
else:
axis_names = ['C', 'H', 'W']
for axis_size, axis_name in zip(self.input_size, axis_names):
axes.append(ng.make_axis(axis_size, name=axis_name))
batch_axis_full = ng.make_axis(self.batch_size, name='N')
input_axes = ng.make_axes(axes)
if prev_input_placeholder is None:
self.input = ng.placeholder(input_axes + [batch_axis_full])
else:
self.input = prev_input_placeholder
self._build_module()
return self.input, self.output(self.input)
def _build_module(self):
pass
def get_name(self):
return self.name
class ImageEmbedder(InputEmbedder):
def __init__(self, input_size, batch_size=None, input_rescaler=255.0, activation_function=neon.Rectlin(), name="embedder"):
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
self.input_rescaler = input_rescaler
def _build_module(self):
# image observation
self.output = neon.Sequential([
neon.Preprocess(functor=lambda x: x / self.input_rescaler),
neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init),
neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init),
neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
filter_init=self.weights_init, bias_init=self.biases_init)
])
class VectorEmbedder(InputEmbedder):
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
def _build_module(self):
# vector observation
self.output = neon.Sequential([
neon.Affine(nout=256, activation=self.activation_function,
weight_init=self.weights_init, bias_init=self.biases_init)
])

View File

@@ -0,0 +1,191 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from architectures.neon_components.embedders import *
from architectures.neon_components.heads import *
from architectures.neon_components.middleware import *
from architectures.neon_components.architecture import *
from configurations import InputTypes, OutputTypes, MiddlewareTypes
class GeneralNeonNetwork(NeonArchitecture):
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
self.global_network = global_network
self.network_is_local = network_is_local
self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
len(tuning_parameters.agent.output_types)
self.input_embedders = []
self.output_heads = []
self.activation_function = self.get_activation_function(
tuning_parameters.agent.hidden_layers_activation_function)
NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
def get_activation_function(self, activation_function_string):
activation_functions = {
'relu': neon.Rectlin(),
'tanh': neon.Tanh(),
'sigmoid': neon.Logistic(),
'elu': neon.Explin(),
'none': None
}
assert activation_function_string in activation_functions.keys(), \
"Activation function must be one of the following {}".format(activation_functions.keys())
return activation_functions[activation_function_string]
def get_input_embedder(self, embedder_type):
# the observation can be either an image or a vector
def get_observation_embedding(with_timestep=False):
if self.input_height > 1:
return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
name="observation")
else:
return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
name="observation")
input_mapping = {
InputTypes.Observation: get_observation_embedding(),
InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
}
return input_mapping[embedder_type]
def get_middleware_embedder(self, middleware_type):
return {MiddlewareTypes.LSTM: None, # LSTM over Neon is currently not supported in Coach
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
def get_output_head(self, head_type, head_idx, loss_weight=1.):
output_mapping = {
OutputTypes.Q: QHead,
OutputTypes.DuelingQ: DuelingQHead,
OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.Pi: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
OutputTypes.DNDQ: None, # NEC over Neon is currently not supported in Coach
OutputTypes.NAF: None, # NAF over Neon is currently not supported in Coach
OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
OutputTypes.PPO_V: None # PPO over Neon is currently not supported in Coach
}
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
def get_model(self, tuning_parameters):
"""
:param tuning_parameters: A Preset class instance with all the running paramaters
:type tuning_parameters: Preset
:return: A model
"""
assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
"Number of loss weights should match the number of output types"
local_network_in_distributed_training = self.global_network is not None and self.network_is_local
tuning_parameters.activation_function = self.activation_function
done_creating_input_placeholders = False
for network_idx in range(self.num_networks):
with name_scope('network_{}'.format(network_idx)):
####################
# Input Embeddings #
####################
state_embedding = []
for idx, input_type in enumerate(self.tp.agent.input_types):
# get the class of the input embedder
self.input_embedders.append(self.get_input_embedder(input_type))
# in the case each head uses a different network, we still reuse the input placeholders
prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
# create the input embedder instance and store the input placeholder and the embedding
input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
if len(self.inputs) < len(self.tp.agent.input_types):
self.inputs.append(input_placeholder)
state_embedding.append(embedding)
done_creating_input_placeholders = True
##############
# Middleware #
##############
state_embedding = ng.concat_along_axis(state_embedding, state_embedding[0].axes[0]) \
if len(state_embedding) > 1 else state_embedding[0]
self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
_, self.state_embedding = self.middleware_embedder(state_embedding)
################
# Output Heads #
################
for head_idx in range(self.num_heads_per_network):
for head_copy_idx in range(self.tp.agent.num_output_head_copies):
if self.tp.agent.use_separate_networks_per_head:
# if we use separate networks per head, then the head type corresponds top the network idx
head_type_idx = network_idx
else:
# if we use a single network with multiple heads, then the head type is the current head idx
head_type_idx = head_idx
self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
head_copy_idx,
self.tp.agent.loss_weights[head_type_idx]))
if self.network_is_local:
output, target_placeholder, input_placeholder = self.output_heads[-1](self.state_embedding)
self.targets.extend(target_placeholder)
else:
output, input_placeholder = self.output_heads[-1](self.state_embedding)
self.outputs.extend(output)
self.inputs.extend(input_placeholder)
# Losses
self.losses = []
for output_head in self.output_heads:
self.losses += output_head.loss
self.total_loss = sum(self.losses)
# Learning rate
if self.tp.learning_rate_decay_rate != 0:
raise Exception("learning rate decay is not supported in neon")
# Optimizer
if local_network_in_distributed_training and \
hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
# distributed training and this is the local network instantiation
self.optimizer = self.global_network.optimizer
else:
if tuning_parameters.agent.optimizer_type == 'Adam':
self.optimizer = neon.Adam(
learning_rate=tuning_parameters.learning_rate,
gradient_clip_norm=tuning_parameters.clip_gradients
)
elif tuning_parameters.agent.optimizer_type == 'RMSProp':
self.optimizer = neon.RMSProp(
learning_rate=tuning_parameters.learning_rate,
gradient_clip_norm=tuning_parameters.clip_gradients,
decay_rate=0.9,
epsilon=0.01
)
elif tuning_parameters.agent.optimizer_type == 'LBFGS':
raise Exception("LBFGS optimizer is not supported in neon")
else:
raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))

View File

@@ -0,0 +1,194 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
from ngraph.util.names import name_scope
import ngraph.frontends.neon as neon
import numpy as np
from utils import force_list
from architectures.neon_components.losses import *
class Head:
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
self.head_idx = head_idx
self.name = "head"
self.output = []
self.loss = []
self.loss_type = []
self.regularizations = []
self.loss_weight = force_list(loss_weight)
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.target = []
self.input = []
self.is_local = is_local
self.batch_size = tuning_parameters.batch_size
def __call__(self, input_layer):
"""
Wrapper for building the module graph including scoping and loss creation
:param input_layer: the input to the graph
:return: the output of the last layer and the target placeholder
"""
with name_scope(self.get_name()):
self._build_module(input_layer)
self.output = force_list(self.output)
self.target = force_list(self.target)
self.input = force_list(self.input)
self.loss_type = force_list(self.loss_type)
self.loss = force_list(self.loss)
self.regularizations = force_list(self.regularizations)
if self.is_local:
self.set_loss()
if self.is_local:
return self.output, self.target, self.input
else:
return self.output, self.input
def _build_module(self, input_layer):
"""
Builds the graph of the module
:param input_layer: the input to the graph
:return: None
"""
pass
def get_name(self):
"""
Get a formatted name for the module
:return: the formatted name
"""
return '{}_{}'.format(self.name, self.head_idx)
def set_loss(self):
"""
Creates a target placeholder and loss function for each loss_type and regularization
:param loss_type: a tensorflow loss function
:param scope: the name scope to include the tensors in
:return: None
"""
# add losses and target placeholder
for idx in range(len(self.loss_type)):
# output_axis = ng.make_axis(self.num_actions, name='q_values')
batch_axis_full = ng.make_axis(self.batch_size, name='N')
target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
self.target.append(target)
loss = self.loss_type[idx](self.target[-1], self.output[idx],
weights=self.loss_weight[idx], scope=self.get_name())
self.loss.append(loss)
# add regularizations
for regularization in self.regularizations:
self.loss.append(regularization)
class QHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# Standard Q Network
self.output = neon.Sequential([
neon.Affine(nout=self.num_actions,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
class DuelingQHead(QHead):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
def _build_module(self, input_layer):
# Dueling Network
# state value tower - V
output_axis = ng.make_axis(self.num_actions, name='q_values')
state_value = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=1,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action advantage tower - A
action_advantage_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(axes=output_axis,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
# merge to state-action value function Q
self.output = repeated_state_value + action_advantage
class MeasurementsPredictionHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'future_measurements_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.num_measurements = tuning_parameters.env.measurements_size[0] \
if tuning_parameters.env.measurements_size else 0
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
# actions expectation tower (expectation stream) - E
with name_scope("expectation_stream"):
expectation_stream = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action fine differences tower (action stream) - A
with name_scope("action_stream"):
action_stream_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=self.num_actions * multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Reshape((self.num_actions, multistep_measurements_size))
])(input_layer)
action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
# merge to future measurements predictions
self.output = repeated_expectation_stream + action_stream

View File

@@ -0,0 +1,28 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
import ngraph.frontends.neon as neon
from ngraph.util.names import name_scope
import numpy as np
def mean_squared_error(targets, outputs, weights=1.0, scope=""):
with name_scope(scope):
# TODO: reduce mean over the action axis
loss = ng.squared_L2(targets - outputs)
weighted_loss = loss * weights
return weighted_loss

View File

@@ -0,0 +1,50 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
import ngraph.frontends.neon as neon
from ngraph.util.names import name_scope
import numpy as np
class MiddlewareEmbedder:
def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
self.name = name
self.input = None
self.output = None
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.activation_function = activation_function
def __call__(self, input_layer):
with name_scope(self.get_name()):
self.input = input_layer
self._build_module()
return self.input, self.output(self.input)
def _build_module(self):
pass
def get_name(self):
return self.name
class FC_Embedder(MiddlewareEmbedder):
def _build_module(self):
self.output = neon.Sequential([
neon.Affine(nout=512, activation=self.activation_function,
weight_init=self.weights_init, bias_init=self.biases_init)])