# # Copyright (c) 2017 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import ngraph as ng from ngraph.frontends import neon from ngraph.util import names as ngraph_names import utils from architectures.neon_components import losses class Head(object): def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True): self.head_idx = head_idx self.name = "head" self.output = [] self.loss = [] self.loss_type = [] self.regularizations = [] self.loss_weight = utils.force_list(loss_weight) self.weights_init = neon.GlorotInit() self.biases_init = neon.ConstantInit() self.target = [] self.input = [] self.is_local = is_local self.batch_size = tuning_parameters.batch_size def __call__(self, input_layer): """ Wrapper for building the module graph including scoping and loss creation :param input_layer: the input to the graph :return: the output of the last layer and the target placeholder """ with ngraph_names.name_scope(self.get_name()): self._build_module(input_layer) self.output = utils.force_list(self.output) self.target = utils.force_list(self.target) self.input = utils.force_list(self.input) self.loss_type = utils.force_list(self.loss_type) self.loss = utils.force_list(self.loss) self.regularizations = utils.force_list(self.regularizations) if self.is_local: self.set_loss() if self.is_local: return self.output, self.target, self.input else: return self.output, self.input def _build_module(self, input_layer): """ Builds the graph of the module :param input_layer: the input to the graph :return: None """ pass def get_name(self): """ Get a formatted name for the module :return: the formatted name """ return '{}_{}'.format(self.name, self.head_idx) def set_loss(self): """ Creates a target placeholder and loss function for each loss_type and regularization :param loss_type: a tensorflow loss function :param scope: the name scope to include the tensors in :return: None """ # add losses and target placeholder for idx in range(len(self.loss_type)): # output_axis = ng.make_axis(self.num_actions, name='q_values') batch_axis_full = ng.make_axis(self.batch_size, name='N') target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full])) self.target.append(target) loss = self.loss_type[idx](self.target[-1], self.output[idx], weights=self.loss_weight[idx], scope=self.get_name()) self.loss.append(loss) # add regularizations for regularization in self.regularizations: self.loss.append(regularization) class QHead(Head): def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True): Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local) self.name = 'q_values_head' self.num_actions = tuning_parameters.env_instance.action_space_size if tuning_parameters.agent.replace_mse_with_huber_loss: raise Exception("huber loss is not supported in neon") else: self.loss_type = losses.mean_squared_error def _build_module(self, input_layer): # Standard Q Network self.output = neon.Sequential([ neon.Affine(nout=self.num_actions, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) class DuelingQHead(QHead): def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True): QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local) def _build_module(self, input_layer): # Dueling Network # state value tower - V output_axis = ng.make_axis(self.num_actions, name='q_values') state_value = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=1, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action advantage tower - A action_advantage_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(axes=output_axis, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized) repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0) # merge to state-action value function Q self.output = repeated_state_value + action_advantage class MeasurementsPredictionHead(Head): def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True): Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local) self.name = 'future_measurements_head' self.num_actions = tuning_parameters.env_instance.action_space_size self.num_measurements = tuning_parameters.env.measurements_size[0] \ if tuning_parameters.env.measurements_size else 0 self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps if tuning_parameters.agent.replace_mse_with_huber_loss: raise Exception("huber loss is not supported in neon") else: self.loss_type = losses.mean_squared_error def _build_module(self, input_layer): # This is almost exactly the same as Dueling Network but we predict the future measurements for each action multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead # actions expectation tower (expectation stream) - E with ngraph_names.name_scope("expectation_stream"): expectation_stream = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action fine differences tower (action stream) - A with ngraph_names.name_scope("action_stream"): action_stream_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=self.num_actions * multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init), neon.Reshape((self.num_actions, multistep_measurements_size)) ])(input_layer) action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized) repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0) repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0) # merge to future measurements predictions self.output = repeated_expectation_stream + action_stream