1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00
Files
coach/architectures/neon_components/heads.py
Gal Leibovich 1d4c3455e7 coach v0.8.0
2017-10-19 13:10:15 +03:00

195 lines
8.2 KiB
Python

#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import ngraph as ng
from ngraph.util.names import name_scope
import ngraph.frontends.neon as neon
import numpy as np
from utils import force_list
from architectures.neon_components.losses import *
class Head:
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
self.head_idx = head_idx
self.name = "head"
self.output = []
self.loss = []
self.loss_type = []
self.regularizations = []
self.loss_weight = force_list(loss_weight)
self.weights_init = neon.GlorotInit()
self.biases_init = neon.ConstantInit()
self.target = []
self.input = []
self.is_local = is_local
self.batch_size = tuning_parameters.batch_size
def __call__(self, input_layer):
"""
Wrapper for building the module graph including scoping and loss creation
:param input_layer: the input to the graph
:return: the output of the last layer and the target placeholder
"""
with name_scope(self.get_name()):
self._build_module(input_layer)
self.output = force_list(self.output)
self.target = force_list(self.target)
self.input = force_list(self.input)
self.loss_type = force_list(self.loss_type)
self.loss = force_list(self.loss)
self.regularizations = force_list(self.regularizations)
if self.is_local:
self.set_loss()
if self.is_local:
return self.output, self.target, self.input
else:
return self.output, self.input
def _build_module(self, input_layer):
"""
Builds the graph of the module
:param input_layer: the input to the graph
:return: None
"""
pass
def get_name(self):
"""
Get a formatted name for the module
:return: the formatted name
"""
return '{}_{}'.format(self.name, self.head_idx)
def set_loss(self):
"""
Creates a target placeholder and loss function for each loss_type and regularization
:param loss_type: a tensorflow loss function
:param scope: the name scope to include the tensors in
:return: None
"""
# add losses and target placeholder
for idx in range(len(self.loss_type)):
# output_axis = ng.make_axis(self.num_actions, name='q_values')
batch_axis_full = ng.make_axis(self.batch_size, name='N')
target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
self.target.append(target)
loss = self.loss_type[idx](self.target[-1], self.output[idx],
weights=self.loss_weight[idx], scope=self.get_name())
self.loss.append(loss)
# add regularizations
for regularization in self.regularizations:
self.loss.append(regularization)
class QHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'q_values_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# Standard Q Network
self.output = neon.Sequential([
neon.Affine(nout=self.num_actions,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
class DuelingQHead(QHead):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
def _build_module(self, input_layer):
# Dueling Network
# state value tower - V
output_axis = ng.make_axis(self.num_actions, name='q_values')
state_value = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=1,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action advantage tower - A
action_advantage_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(axes=output_axis,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
# merge to state-action value function Q
self.output = repeated_state_value + action_advantage
class MeasurementsPredictionHead(Head):
def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
self.name = 'future_measurements_head'
self.num_actions = tuning_parameters.env_instance.action_space_size
self.num_measurements = tuning_parameters.env.measurements_size[0] \
if tuning_parameters.env.measurements_size else 0
self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
if tuning_parameters.agent.replace_mse_with_huber_loss:
raise Exception("huber loss is not supported in neon")
else:
self.loss_type = mean_squared_error
def _build_module(self, input_layer):
# This is almost exactly the same as Dueling Network but we predict the future measurements for each action
multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
# actions expectation tower (expectation stream) - E
with name_scope("expectation_stream"):
expectation_stream = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init)
])(input_layer)
# action fine differences tower (action stream) - A
with name_scope("action_stream"):
action_stream_unnormalized = neon.Sequential([
neon.Affine(nout=256, activation=neon.Rectlin(),
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Affine(nout=self.num_actions * multistep_measurements_size,
weight_init=self.weights_init, bias_init=self.biases_init),
neon.Reshape((self.num_actions, multistep_measurements_size))
])(input_layer)
action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
# merge to future measurements predictions
self.output = repeated_expectation_stream + action_stream