mirror of
https://github.com/gryf/coach.git
synced 2026-04-09 22:53:40 +02:00
coach v0.8.0
This commit is contained in:
0
architectures/neon_components/__init__.py
Normal file
0
architectures/neon_components/__init__.py
Normal file
129
architectures/neon_components/architecture.py
Normal file
129
architectures/neon_components/architecture.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import sys
|
||||
import copy
|
||||
from ngraph.frontends.neon import *
|
||||
import ngraph as ng
|
||||
from architectures.architecture import *
|
||||
import numpy as np
|
||||
from utils import *
|
||||
|
||||
|
||||
class NeonArchitecture(Architecture):
|
||||
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
|
||||
Architecture.__init__(self, tuning_parameters, name)
|
||||
assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
|
||||
self.clip_error = tuning_parameters.clip_gradients
|
||||
self.total_loss = None
|
||||
self.epoch = 0
|
||||
self.inputs = []
|
||||
self.outputs = []
|
||||
self.targets = []
|
||||
self.losses = []
|
||||
|
||||
self.transformer = tuning_parameters.sess
|
||||
self.network = self.get_model(tuning_parameters)
|
||||
self.accumulated_gradients = []
|
||||
|
||||
# training and inference ops
|
||||
train_output = ng.sequential([
|
||||
self.optimizer(self.total_loss),
|
||||
self.total_loss
|
||||
])
|
||||
placeholders = self.inputs + self.targets
|
||||
self.train_op = self.transformer.add_computation(
|
||||
ng.computation(
|
||||
train_output, *placeholders
|
||||
)
|
||||
)
|
||||
self.predict_op = self.transformer.add_computation(
|
||||
ng.computation(
|
||||
self.outputs, self.inputs[0]
|
||||
)
|
||||
)
|
||||
|
||||
# update weights from array op
|
||||
self.weights = [ng.placeholder(w.axes) for w in self.total_loss.variables()]
|
||||
self.set_weights_ops = []
|
||||
for target_variable, variable in zip(self.total_loss.variables(), self.weights):
|
||||
self.set_weights_ops.append(self.transformer.add_computation(
|
||||
ng.computation(
|
||||
ng.assign(target_variable, variable), variable
|
||||
)
|
||||
))
|
||||
|
||||
# get weights op
|
||||
self.get_variables = self.transformer.add_computation(
|
||||
ng.computation(
|
||||
self.total_loss.variables()
|
||||
)
|
||||
)
|
||||
|
||||
def predict(self, inputs):
|
||||
batch_size = inputs.shape[0]
|
||||
|
||||
# move batch axis to the end
|
||||
inputs = inputs.swapaxes(0, -1)
|
||||
prediction = self.predict_op(inputs) # TODO: problem with multiple inputs
|
||||
|
||||
if type(prediction) != tuple:
|
||||
prediction = (prediction)
|
||||
|
||||
# process all the outputs from the network
|
||||
output = []
|
||||
for p in prediction:
|
||||
output.append(p.transpose()[:batch_size].copy())
|
||||
|
||||
# if there is only one output then we don't need a list
|
||||
if len(output) == 1:
|
||||
output = output[0]
|
||||
return output
|
||||
|
||||
def train_on_batch(self, inputs, targets):
|
||||
loss = self.accumulate_gradients(inputs, targets)
|
||||
self.apply_and_reset_gradients(self.accumulated_gradients)
|
||||
return loss
|
||||
|
||||
def get_weights(self):
|
||||
return self.get_variables()
|
||||
|
||||
def set_weights(self, weights, rate=1.0):
|
||||
if rate != 1:
|
||||
current_weights = self.get_weights()
|
||||
updated_weights = [(1 - rate) * t + rate * o for t, o in zip(current_weights, weights)]
|
||||
else:
|
||||
updated_weights = weights
|
||||
for update_function, variable in zip(self.set_weights_ops, updated_weights):
|
||||
update_function(variable)
|
||||
|
||||
def accumulate_gradients(self, inputs, targets):
|
||||
# Neon doesn't currently allow separating the grads calculation and grad apply operations
|
||||
# so this feature is not currently available. instead we do a full training iteration
|
||||
inputs = force_list(inputs)
|
||||
targets = force_list(targets)
|
||||
|
||||
for idx, input in enumerate(inputs):
|
||||
inputs[idx] = input.swapaxes(0, -1)
|
||||
|
||||
for idx, target in enumerate(targets):
|
||||
targets[idx] = np.rollaxis(target, 0, len(target.shape))
|
||||
|
||||
all_inputs = inputs + targets
|
||||
|
||||
loss = np.mean(self.train_op(*all_inputs))
|
||||
|
||||
return [loss]
|
||||
88
architectures/neon_components/embedders.py
Normal file
88
architectures/neon_components/embedders.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import ngraph.frontends.neon as neon
|
||||
import ngraph as ng
|
||||
from ngraph.util.names import name_scope
|
||||
|
||||
|
||||
class InputEmbedder:
|
||||
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
|
||||
self.name = name
|
||||
self.input_size = input_size
|
||||
self.batch_size = batch_size
|
||||
self.activation_function = activation_function
|
||||
self.weights_init = neon.GlorotInit()
|
||||
self.biases_init = neon.ConstantInit()
|
||||
self.input = None
|
||||
self.output = None
|
||||
|
||||
def __call__(self, prev_input_placeholder=None):
|
||||
with name_scope(self.get_name()):
|
||||
# create the input axes
|
||||
axes = []
|
||||
if len(self.input_size) == 2:
|
||||
axis_names = ['H', 'W']
|
||||
else:
|
||||
axis_names = ['C', 'H', 'W']
|
||||
for axis_size, axis_name in zip(self.input_size, axis_names):
|
||||
axes.append(ng.make_axis(axis_size, name=axis_name))
|
||||
batch_axis_full = ng.make_axis(self.batch_size, name='N')
|
||||
input_axes = ng.make_axes(axes)
|
||||
|
||||
if prev_input_placeholder is None:
|
||||
self.input = ng.placeholder(input_axes + [batch_axis_full])
|
||||
else:
|
||||
self.input = prev_input_placeholder
|
||||
self._build_module()
|
||||
|
||||
return self.input, self.output(self.input)
|
||||
|
||||
def _build_module(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class ImageEmbedder(InputEmbedder):
|
||||
def __init__(self, input_size, batch_size=None, input_rescaler=255.0, activation_function=neon.Rectlin(), name="embedder"):
|
||||
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
|
||||
self.input_rescaler = input_rescaler
|
||||
|
||||
def _build_module(self):
|
||||
# image observation
|
||||
self.output = neon.Sequential([
|
||||
neon.Preprocess(functor=lambda x: x / self.input_rescaler),
|
||||
neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
|
||||
filter_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
|
||||
filter_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
|
||||
filter_init=self.weights_init, bias_init=self.biases_init)
|
||||
])
|
||||
|
||||
|
||||
class VectorEmbedder(InputEmbedder):
|
||||
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
|
||||
InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
|
||||
|
||||
def _build_module(self):
|
||||
# vector observation
|
||||
self.output = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=self.activation_function,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])
|
||||
191
architectures/neon_components/general_network.py
Normal file
191
architectures/neon_components/general_network.py
Normal file
@@ -0,0 +1,191 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from architectures.neon_components.embedders import *
|
||||
from architectures.neon_components.heads import *
|
||||
from architectures.neon_components.middleware import *
|
||||
from architectures.neon_components.architecture import *
|
||||
from configurations import InputTypes, OutputTypes, MiddlewareTypes
|
||||
|
||||
|
||||
class GeneralNeonNetwork(NeonArchitecture):
|
||||
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
|
||||
self.global_network = global_network
|
||||
self.network_is_local = network_is_local
|
||||
self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
|
||||
len(tuning_parameters.agent.output_types)
|
||||
self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
|
||||
len(tuning_parameters.agent.output_types)
|
||||
self.input_embedders = []
|
||||
self.output_heads = []
|
||||
self.activation_function = self.get_activation_function(
|
||||
tuning_parameters.agent.hidden_layers_activation_function)
|
||||
|
||||
NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
|
||||
|
||||
def get_activation_function(self, activation_function_string):
|
||||
activation_functions = {
|
||||
'relu': neon.Rectlin(),
|
||||
'tanh': neon.Tanh(),
|
||||
'sigmoid': neon.Logistic(),
|
||||
'elu': neon.Explin(),
|
||||
'none': None
|
||||
}
|
||||
assert activation_function_string in activation_functions.keys(), \
|
||||
"Activation function must be one of the following {}".format(activation_functions.keys())
|
||||
return activation_functions[activation_function_string]
|
||||
|
||||
def get_input_embedder(self, embedder_type):
|
||||
# the observation can be either an image or a vector
|
||||
def get_observation_embedding(with_timestep=False):
|
||||
if self.input_height > 1:
|
||||
return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
|
||||
name="observation")
|
||||
else:
|
||||
return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
|
||||
name="observation")
|
||||
|
||||
input_mapping = {
|
||||
InputTypes.Observation: get_observation_embedding(),
|
||||
InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
|
||||
InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
|
||||
InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
|
||||
InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
|
||||
}
|
||||
return input_mapping[embedder_type]
|
||||
|
||||
def get_middleware_embedder(self, middleware_type):
|
||||
return {MiddlewareTypes.LSTM: None, # LSTM over Neon is currently not supported in Coach
|
||||
MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
|
||||
|
||||
def get_output_head(self, head_type, head_idx, loss_weight=1.):
|
||||
output_mapping = {
|
||||
OutputTypes.Q: QHead,
|
||||
OutputTypes.DuelingQ: DuelingQHead,
|
||||
OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
|
||||
OutputTypes.Pi: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
|
||||
OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
|
||||
OutputTypes.DNDQ: None, # NEC over Neon is currently not supported in Coach
|
||||
OutputTypes.NAF: None, # NAF over Neon is currently not supported in Coach
|
||||
OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
|
||||
OutputTypes.PPO_V: None # PPO over Neon is currently not supported in Coach
|
||||
}
|
||||
return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
|
||||
|
||||
def get_model(self, tuning_parameters):
|
||||
"""
|
||||
:param tuning_parameters: A Preset class instance with all the running paramaters
|
||||
:type tuning_parameters: Preset
|
||||
:return: A model
|
||||
"""
|
||||
assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
|
||||
assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
|
||||
assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
|
||||
assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
|
||||
assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
|
||||
"Number of loss weights should match the number of output types"
|
||||
local_network_in_distributed_training = self.global_network is not None and self.network_is_local
|
||||
|
||||
tuning_parameters.activation_function = self.activation_function
|
||||
done_creating_input_placeholders = False
|
||||
|
||||
for network_idx in range(self.num_networks):
|
||||
with name_scope('network_{}'.format(network_idx)):
|
||||
####################
|
||||
# Input Embeddings #
|
||||
####################
|
||||
|
||||
state_embedding = []
|
||||
for idx, input_type in enumerate(self.tp.agent.input_types):
|
||||
# get the class of the input embedder
|
||||
self.input_embedders.append(self.get_input_embedder(input_type))
|
||||
|
||||
# in the case each head uses a different network, we still reuse the input placeholders
|
||||
prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
|
||||
|
||||
# create the input embedder instance and store the input placeholder and the embedding
|
||||
input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
|
||||
if len(self.inputs) < len(self.tp.agent.input_types):
|
||||
self.inputs.append(input_placeholder)
|
||||
state_embedding.append(embedding)
|
||||
|
||||
done_creating_input_placeholders = True
|
||||
|
||||
##############
|
||||
# Middleware #
|
||||
##############
|
||||
|
||||
state_embedding = ng.concat_along_axis(state_embedding, state_embedding[0].axes[0]) \
|
||||
if len(state_embedding) > 1 else state_embedding[0]
|
||||
self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
|
||||
_, self.state_embedding = self.middleware_embedder(state_embedding)
|
||||
|
||||
################
|
||||
# Output Heads #
|
||||
################
|
||||
|
||||
for head_idx in range(self.num_heads_per_network):
|
||||
for head_copy_idx in range(self.tp.agent.num_output_head_copies):
|
||||
if self.tp.agent.use_separate_networks_per_head:
|
||||
# if we use separate networks per head, then the head type corresponds top the network idx
|
||||
head_type_idx = network_idx
|
||||
else:
|
||||
# if we use a single network with multiple heads, then the head type is the current head idx
|
||||
head_type_idx = head_idx
|
||||
self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
|
||||
head_copy_idx,
|
||||
self.tp.agent.loss_weights[head_type_idx]))
|
||||
if self.network_is_local:
|
||||
output, target_placeholder, input_placeholder = self.output_heads[-1](self.state_embedding)
|
||||
self.targets.extend(target_placeholder)
|
||||
else:
|
||||
output, input_placeholder = self.output_heads[-1](self.state_embedding)
|
||||
|
||||
self.outputs.extend(output)
|
||||
self.inputs.extend(input_placeholder)
|
||||
|
||||
# Losses
|
||||
self.losses = []
|
||||
for output_head in self.output_heads:
|
||||
self.losses += output_head.loss
|
||||
self.total_loss = sum(self.losses)
|
||||
|
||||
# Learning rate
|
||||
if self.tp.learning_rate_decay_rate != 0:
|
||||
raise Exception("learning rate decay is not supported in neon")
|
||||
|
||||
# Optimizer
|
||||
if local_network_in_distributed_training and \
|
||||
hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
|
||||
# distributed training and this is the local network instantiation
|
||||
self.optimizer = self.global_network.optimizer
|
||||
else:
|
||||
if tuning_parameters.agent.optimizer_type == 'Adam':
|
||||
self.optimizer = neon.Adam(
|
||||
learning_rate=tuning_parameters.learning_rate,
|
||||
gradient_clip_norm=tuning_parameters.clip_gradients
|
||||
)
|
||||
elif tuning_parameters.agent.optimizer_type == 'RMSProp':
|
||||
self.optimizer = neon.RMSProp(
|
||||
learning_rate=tuning_parameters.learning_rate,
|
||||
gradient_clip_norm=tuning_parameters.clip_gradients,
|
||||
decay_rate=0.9,
|
||||
epsilon=0.01
|
||||
)
|
||||
elif tuning_parameters.agent.optimizer_type == 'LBFGS':
|
||||
raise Exception("LBFGS optimizer is not supported in neon")
|
||||
else:
|
||||
raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))
|
||||
194
architectures/neon_components/heads.py
Normal file
194
architectures/neon_components/heads.py
Normal file
@@ -0,0 +1,194 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import ngraph as ng
|
||||
from ngraph.util.names import name_scope
|
||||
import ngraph.frontends.neon as neon
|
||||
import numpy as np
|
||||
from utils import force_list
|
||||
from architectures.neon_components.losses import *
|
||||
|
||||
|
||||
class Head:
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
self.head_idx = head_idx
|
||||
self.name = "head"
|
||||
self.output = []
|
||||
self.loss = []
|
||||
self.loss_type = []
|
||||
self.regularizations = []
|
||||
self.loss_weight = force_list(loss_weight)
|
||||
self.weights_init = neon.GlorotInit()
|
||||
self.biases_init = neon.ConstantInit()
|
||||
self.target = []
|
||||
self.input = []
|
||||
self.is_local = is_local
|
||||
self.batch_size = tuning_parameters.batch_size
|
||||
|
||||
def __call__(self, input_layer):
|
||||
"""
|
||||
Wrapper for building the module graph including scoping and loss creation
|
||||
:param input_layer: the input to the graph
|
||||
:return: the output of the last layer and the target placeholder
|
||||
"""
|
||||
with name_scope(self.get_name()):
|
||||
self._build_module(input_layer)
|
||||
|
||||
self.output = force_list(self.output)
|
||||
self.target = force_list(self.target)
|
||||
self.input = force_list(self.input)
|
||||
self.loss_type = force_list(self.loss_type)
|
||||
self.loss = force_list(self.loss)
|
||||
self.regularizations = force_list(self.regularizations)
|
||||
if self.is_local:
|
||||
self.set_loss()
|
||||
|
||||
if self.is_local:
|
||||
return self.output, self.target, self.input
|
||||
else:
|
||||
return self.output, self.input
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
"""
|
||||
Builds the graph of the module
|
||||
:param input_layer: the input to the graph
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
"""
|
||||
Get a formatted name for the module
|
||||
:return: the formatted name
|
||||
"""
|
||||
return '{}_{}'.format(self.name, self.head_idx)
|
||||
|
||||
def set_loss(self):
|
||||
"""
|
||||
Creates a target placeholder and loss function for each loss_type and regularization
|
||||
:param loss_type: a tensorflow loss function
|
||||
:param scope: the name scope to include the tensors in
|
||||
:return: None
|
||||
"""
|
||||
# add losses and target placeholder
|
||||
for idx in range(len(self.loss_type)):
|
||||
# output_axis = ng.make_axis(self.num_actions, name='q_values')
|
||||
batch_axis_full = ng.make_axis(self.batch_size, name='N')
|
||||
target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
|
||||
self.target.append(target)
|
||||
loss = self.loss_type[idx](self.target[-1], self.output[idx],
|
||||
weights=self.loss_weight[idx], scope=self.get_name())
|
||||
self.loss.append(loss)
|
||||
|
||||
# add regularizations
|
||||
for regularization in self.regularizations:
|
||||
self.loss.append(regularization)
|
||||
|
||||
|
||||
class QHead(Head):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
self.name = 'q_values_head'
|
||||
self.num_actions = tuning_parameters.env_instance.action_space_size
|
||||
if tuning_parameters.agent.replace_mse_with_huber_loss:
|
||||
raise Exception("huber loss is not supported in neon")
|
||||
else:
|
||||
self.loss_type = mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# Standard Q Network
|
||||
self.output = neon.Sequential([
|
||||
neon.Affine(nout=self.num_actions,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
|
||||
class DuelingQHead(QHead):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# Dueling Network
|
||||
# state value tower - V
|
||||
output_axis = ng.make_axis(self.num_actions, name='q_values')
|
||||
|
||||
state_value = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=1,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
# action advantage tower - A
|
||||
action_advantage_unnormalized = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(axes=output_axis,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
|
||||
|
||||
repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
|
||||
|
||||
# merge to state-action value function Q
|
||||
self.output = repeated_state_value + action_advantage
|
||||
|
||||
|
||||
class MeasurementsPredictionHead(Head):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
self.name = 'future_measurements_head'
|
||||
self.num_actions = tuning_parameters.env_instance.action_space_size
|
||||
self.num_measurements = tuning_parameters.env.measurements_size[0] \
|
||||
if tuning_parameters.env.measurements_size else 0
|
||||
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
|
||||
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
|
||||
if tuning_parameters.agent.replace_mse_with_huber_loss:
|
||||
raise Exception("huber loss is not supported in neon")
|
||||
else:
|
||||
self.loss_type = mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
|
||||
|
||||
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
|
||||
|
||||
# actions expectation tower (expectation stream) - E
|
||||
with name_scope("expectation_stream"):
|
||||
expectation_stream = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=multistep_measurements_size,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
# action fine differences tower (action stream) - A
|
||||
with name_scope("action_stream"):
|
||||
action_stream_unnormalized = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=self.num_actions * multistep_measurements_size,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Reshape((self.num_actions, multistep_measurements_size))
|
||||
])(input_layer)
|
||||
action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
|
||||
|
||||
repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
|
||||
repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
|
||||
|
||||
# merge to future measurements predictions
|
||||
self.output = repeated_expectation_stream + action_stream
|
||||
|
||||
28
architectures/neon_components/losses.py
Normal file
28
architectures/neon_components/losses.py
Normal file
@@ -0,0 +1,28 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import ngraph as ng
|
||||
import ngraph.frontends.neon as neon
|
||||
from ngraph.util.names import name_scope
|
||||
import numpy as np
|
||||
|
||||
|
||||
def mean_squared_error(targets, outputs, weights=1.0, scope=""):
|
||||
with name_scope(scope):
|
||||
# TODO: reduce mean over the action axis
|
||||
loss = ng.squared_L2(targets - outputs)
|
||||
weighted_loss = loss * weights
|
||||
return weighted_loss
|
||||
50
architectures/neon_components/middleware.py
Normal file
50
architectures/neon_components/middleware.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import ngraph as ng
|
||||
import ngraph.frontends.neon as neon
|
||||
from ngraph.util.names import name_scope
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MiddlewareEmbedder:
|
||||
def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
|
||||
self.name = name
|
||||
self.input = None
|
||||
self.output = None
|
||||
self.weights_init = neon.GlorotInit()
|
||||
self.biases_init = neon.ConstantInit()
|
||||
self.activation_function = activation_function
|
||||
|
||||
def __call__(self, input_layer):
|
||||
with name_scope(self.get_name()):
|
||||
self.input = input_layer
|
||||
self._build_module()
|
||||
|
||||
return self.input, self.output(self.input)
|
||||
|
||||
def _build_module(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class FC_Embedder(MiddlewareEmbedder):
|
||||
def _build_module(self):
|
||||
self.output = neon.Sequential([
|
||||
neon.Affine(nout=512, activation=self.activation_function,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)])
|
||||
Reference in New Issue
Block a user