mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
coach v0.8.0
This commit is contained in:
194
architectures/neon_components/heads.py
Normal file
194
architectures/neon_components/heads.py
Normal file
@@ -0,0 +1,194 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import ngraph as ng
|
||||
from ngraph.util.names import name_scope
|
||||
import ngraph.frontends.neon as neon
|
||||
import numpy as np
|
||||
from utils import force_list
|
||||
from architectures.neon_components.losses import *
|
||||
|
||||
|
||||
class Head:
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
self.head_idx = head_idx
|
||||
self.name = "head"
|
||||
self.output = []
|
||||
self.loss = []
|
||||
self.loss_type = []
|
||||
self.regularizations = []
|
||||
self.loss_weight = force_list(loss_weight)
|
||||
self.weights_init = neon.GlorotInit()
|
||||
self.biases_init = neon.ConstantInit()
|
||||
self.target = []
|
||||
self.input = []
|
||||
self.is_local = is_local
|
||||
self.batch_size = tuning_parameters.batch_size
|
||||
|
||||
def __call__(self, input_layer):
|
||||
"""
|
||||
Wrapper for building the module graph including scoping and loss creation
|
||||
:param input_layer: the input to the graph
|
||||
:return: the output of the last layer and the target placeholder
|
||||
"""
|
||||
with name_scope(self.get_name()):
|
||||
self._build_module(input_layer)
|
||||
|
||||
self.output = force_list(self.output)
|
||||
self.target = force_list(self.target)
|
||||
self.input = force_list(self.input)
|
||||
self.loss_type = force_list(self.loss_type)
|
||||
self.loss = force_list(self.loss)
|
||||
self.regularizations = force_list(self.regularizations)
|
||||
if self.is_local:
|
||||
self.set_loss()
|
||||
|
||||
if self.is_local:
|
||||
return self.output, self.target, self.input
|
||||
else:
|
||||
return self.output, self.input
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
"""
|
||||
Builds the graph of the module
|
||||
:param input_layer: the input to the graph
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
"""
|
||||
Get a formatted name for the module
|
||||
:return: the formatted name
|
||||
"""
|
||||
return '{}_{}'.format(self.name, self.head_idx)
|
||||
|
||||
def set_loss(self):
|
||||
"""
|
||||
Creates a target placeholder and loss function for each loss_type and regularization
|
||||
:param loss_type: a tensorflow loss function
|
||||
:param scope: the name scope to include the tensors in
|
||||
:return: None
|
||||
"""
|
||||
# add losses and target placeholder
|
||||
for idx in range(len(self.loss_type)):
|
||||
# output_axis = ng.make_axis(self.num_actions, name='q_values')
|
||||
batch_axis_full = ng.make_axis(self.batch_size, name='N')
|
||||
target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
|
||||
self.target.append(target)
|
||||
loss = self.loss_type[idx](self.target[-1], self.output[idx],
|
||||
weights=self.loss_weight[idx], scope=self.get_name())
|
||||
self.loss.append(loss)
|
||||
|
||||
# add regularizations
|
||||
for regularization in self.regularizations:
|
||||
self.loss.append(regularization)
|
||||
|
||||
|
||||
class QHead(Head):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
self.name = 'q_values_head'
|
||||
self.num_actions = tuning_parameters.env_instance.action_space_size
|
||||
if tuning_parameters.agent.replace_mse_with_huber_loss:
|
||||
raise Exception("huber loss is not supported in neon")
|
||||
else:
|
||||
self.loss_type = mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# Standard Q Network
|
||||
self.output = neon.Sequential([
|
||||
neon.Affine(nout=self.num_actions,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
|
||||
class DuelingQHead(QHead):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# Dueling Network
|
||||
# state value tower - V
|
||||
output_axis = ng.make_axis(self.num_actions, name='q_values')
|
||||
|
||||
state_value = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=1,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
# action advantage tower - A
|
||||
action_advantage_unnormalized = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(axes=output_axis,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
|
||||
|
||||
repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
|
||||
|
||||
# merge to state-action value function Q
|
||||
self.output = repeated_state_value + action_advantage
|
||||
|
||||
|
||||
class MeasurementsPredictionHead(Head):
|
||||
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
|
||||
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
|
||||
self.name = 'future_measurements_head'
|
||||
self.num_actions = tuning_parameters.env_instance.action_space_size
|
||||
self.num_measurements = tuning_parameters.env.measurements_size[0] \
|
||||
if tuning_parameters.env.measurements_size else 0
|
||||
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
|
||||
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
|
||||
if tuning_parameters.agent.replace_mse_with_huber_loss:
|
||||
raise Exception("huber loss is not supported in neon")
|
||||
else:
|
||||
self.loss_type = mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
|
||||
|
||||
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
|
||||
|
||||
# actions expectation tower (expectation stream) - E
|
||||
with name_scope("expectation_stream"):
|
||||
expectation_stream = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=multistep_measurements_size,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init)
|
||||
])(input_layer)
|
||||
|
||||
# action fine differences tower (action stream) - A
|
||||
with name_scope("action_stream"):
|
||||
action_stream_unnormalized = neon.Sequential([
|
||||
neon.Affine(nout=256, activation=neon.Rectlin(),
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Affine(nout=self.num_actions * multistep_measurements_size,
|
||||
weight_init=self.weights_init, bias_init=self.biases_init),
|
||||
neon.Reshape((self.num_actions, multistep_measurements_size))
|
||||
])(input_layer)
|
||||
action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
|
||||
|
||||
repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
|
||||
repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
|
||||
|
||||
# merge to future measurements predictions
|
||||
self.output = repeated_expectation_stream + action_stream
|
||||
|
||||
Reference in New Issue
Block a user