coach v0.8.0

2026-04-20 23:41:24 +02:00 · 2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions
@@ -0,0 +1,129 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import copy
+from ngraph.frontends.neon import *
+import ngraph as ng
+from architectures.architecture import *
+import numpy as np
+from utils import *
+
+
+class NeonArchitecture(Architecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        Architecture.__init__(self, tuning_parameters, name)
+        assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
+        self.clip_error = tuning_parameters.clip_gradients
+        self.total_loss = None
+        self.epoch = 0
+        self.inputs = []
+        self.outputs = []
+        self.targets = []
+        self.losses = []
+
+        self.transformer = tuning_parameters.sess
+        self.network = self.get_model(tuning_parameters)
+        self.accumulated_gradients = []
+
+        # training and inference ops
+        train_output = ng.sequential([
+            self.optimizer(self.total_loss),
+            self.total_loss
+        ])
+        placeholders = self.inputs + self.targets
+        self.train_op = self.transformer.add_computation(
+            ng.computation(
+                train_output, *placeholders
+            )
+        )
+        self.predict_op = self.transformer.add_computation(
+            ng.computation(
+                self.outputs, self.inputs[0]
+            )
+        )
+
+        # update weights from array op
+        self.weights = [ng.placeholder(w.axes) for w in self.total_loss.variables()]
+        self.set_weights_ops = []
+        for target_variable, variable in zip(self.total_loss.variables(), self.weights):
+            self.set_weights_ops.append(self.transformer.add_computation(
+                ng.computation(
+                    ng.assign(target_variable, variable), variable
+                )
+            ))
+
+        # get weights op
+        self.get_variables = self.transformer.add_computation(
+            ng.computation(
+                self.total_loss.variables()
+            )
+        )
+
+    def predict(self, inputs):
+        batch_size = inputs.shape[0]
+
+        # move batch axis to the end
+        inputs = inputs.swapaxes(0, -1)
+        prediction = self.predict_op(inputs)  # TODO: problem with multiple inputs
+
+        if type(prediction) != tuple:
+            prediction = (prediction)
+
+        # process all the outputs from the network
+        output = []
+        for p in prediction:
+            output.append(p.transpose()[:batch_size].copy())
+
+        # if there is only one output then we don't need a list
+        if len(output) == 1:
+            output = output[0]
+        return output
+
+    def train_on_batch(self, inputs, targets):
+        loss = self.accumulate_gradients(inputs, targets)
+        self.apply_and_reset_gradients(self.accumulated_gradients)
+        return loss
+
+    def get_weights(self):
+        return self.get_variables()
+
+    def set_weights(self, weights, rate=1.0):
+        if rate != 1:
+            current_weights = self.get_weights()
+            updated_weights = [(1 - rate) * t + rate * o for t, o in zip(current_weights, weights)]
+        else:
+            updated_weights = weights
+        for update_function, variable in zip(self.set_weights_ops, updated_weights):
+            update_function(variable)
+
+    def accumulate_gradients(self, inputs, targets):
+        # Neon doesn't currently allow separating the grads calculation and grad apply operations
+        # so this feature is not currently available. instead we do a full training iteration
+        inputs = force_list(inputs)
+        targets = force_list(targets)
+
+        for idx, input in enumerate(inputs):
+            inputs[idx] = input.swapaxes(0, -1)
+
+        for idx, target in enumerate(targets):
+            targets[idx] = np.rollaxis(target, 0, len(target.shape))
+
+        all_inputs = inputs + targets
+
+        loss = np.mean(self.train_op(*all_inputs))
+
+        return [loss]
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph.frontends.neon as neon
+import ngraph as ng
+from ngraph.util.names import name_scope
+
+
+class InputEmbedder:
+    def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
+        self.name = name
+        self.input_size = input_size
+        self.batch_size = batch_size
+        self.activation_function = activation_function
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.input = None
+        self.output = None
+
+    def __call__(self, prev_input_placeholder=None):
+        with name_scope(self.get_name()):
+            # create the input axes
+            axes = []
+            if len(self.input_size) == 2:
+                axis_names = ['H', 'W']
+            else:
+                axis_names = ['C', 'H', 'W']
+            for axis_size, axis_name in zip(self.input_size, axis_names):
+                axes.append(ng.make_axis(axis_size, name=axis_name))
+            batch_axis_full = ng.make_axis(self.batch_size, name='N')
+            input_axes = ng.make_axes(axes)
+
+            if prev_input_placeholder is None:
+                self.input = ng.placeholder(input_axes + [batch_axis_full])
+            else:
+                self.input = prev_input_placeholder
+            self._build_module()
+
+        return self.input, self.output(self.input)
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class ImageEmbedder(InputEmbedder):
+    def __init__(self, input_size, batch_size=None, input_rescaler=255.0, activation_function=neon.Rectlin(), name="embedder"):
+        InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
+        self.input_rescaler = input_rescaler
+
+    def _build_module(self):
+        # image observation
+        self.output = neon.Sequential([
+            neon.Preprocess(functor=lambda x: x / self.input_rescaler),
+            neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init),
+            neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init),
+            neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init)
+        ])
+
+
+class VectorEmbedder(InputEmbedder):
+    def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
+        InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
+
+    def _build_module(self):
+        # vector observation
+        self.output = neon.Sequential([
+                neon.Affine(nout=256, activation=self.activation_function,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])
@@ -0,0 +1,191 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from architectures.neon_components.embedders import *
+from architectures.neon_components.heads import *
+from architectures.neon_components.middleware import *
+from architectures.neon_components.architecture import *
+from configurations import InputTypes, OutputTypes, MiddlewareTypes
+
+
+class GeneralNeonNetwork(NeonArchitecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        self.global_network = global_network
+        self.network_is_local = network_is_local
+        self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.input_embedders = []
+        self.output_heads = []
+        self.activation_function = self.get_activation_function(
+            tuning_parameters.agent.hidden_layers_activation_function)
+
+        NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
+
+    def get_activation_function(self, activation_function_string):
+        activation_functions = {
+            'relu': neon.Rectlin(),
+            'tanh': neon.Tanh(),
+            'sigmoid': neon.Logistic(),
+            'elu': neon.Explin(),
+            'none': None
+        }
+        assert activation_function_string in activation_functions.keys(), \
+            "Activation function must be one of the following {}".format(activation_functions.keys())
+        return activation_functions[activation_function_string]
+
+    def get_input_embedder(self, embedder_type):
+        # the observation can be either an image or a vector
+        def get_observation_embedding(with_timestep=False):
+            if self.input_height > 1:
+                return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
+                                     name="observation")
+            else:
+                return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
+                                      name="observation")
+
+        input_mapping = {
+            InputTypes.Observation: get_observation_embedding(),
+            InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
+            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
+            InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
+            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
+        }
+        return input_mapping[embedder_type]
+
+    def get_middleware_embedder(self, middleware_type):
+        return {MiddlewareTypes.LSTM: None,   # LSTM over Neon is currently not supported in Coach
+                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+
+    def get_output_head(self, head_type, head_idx, loss_weight=1.):
+        output_mapping = {
+            OutputTypes.Q: QHead,
+            OutputTypes.DuelingQ: DuelingQHead,
+            OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
+            OutputTypes.Pi: None,  # Policy Optimization algorithms over Neon are currently not supported in Coach
+            OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
+            OutputTypes.DNDQ: None,  # NEC over Neon is currently not supported in Coach
+            OutputTypes.NAF: None,  # NAF over Neon is currently not supported in Coach
+            OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
+            OutputTypes.PPO_V: None  # PPO over Neon is currently not supported in Coach
+        }
+        return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
+
+    def get_model(self, tuning_parameters):
+        """
+        :param tuning_parameters: A Preset class instance with all the running paramaters
+        :type tuning_parameters: Preset
+        :return: A model
+        """
+        assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
+        assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
+        assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
+        assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
+        assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
+            "Number of loss weights should match the number of output types"
+        local_network_in_distributed_training = self.global_network is not None and self.network_is_local
+
+        tuning_parameters.activation_function = self.activation_function
+        done_creating_input_placeholders = False
+
+        for network_idx in range(self.num_networks):
+            with name_scope('network_{}'.format(network_idx)):
+                ####################
+                # Input Embeddings #
+                ####################
+
+                state_embedding = []
+                for idx, input_type in enumerate(self.tp.agent.input_types):
+                    # get the class of the input embedder
+                    self.input_embedders.append(self.get_input_embedder(input_type))
+
+                    # in the case each head uses a different network, we still reuse the input placeholders
+                    prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
+
+                    # create the input embedder instance and store the input placeholder and the embedding
+                    input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
+                    if len(self.inputs) < len(self.tp.agent.input_types):
+                        self.inputs.append(input_placeholder)
+                    state_embedding.append(embedding)
+
+                done_creating_input_placeholders = True
+
+                ##############
+                # Middleware #
+                ##############
+
+                state_embedding = ng.concat_along_axis(state_embedding, state_embedding[0].axes[0]) \
+                    if len(state_embedding) > 1 else state_embedding[0]
+                self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
+                _, self.state_embedding = self.middleware_embedder(state_embedding)
+
+                ################
+                # Output Heads #
+                ################
+
+                for head_idx in range(self.num_heads_per_network):
+                    for head_copy_idx in range(self.tp.agent.num_output_head_copies):
+                        if self.tp.agent.use_separate_networks_per_head:
+                            # if we use separate networks per head, then the head type corresponds top the network idx
+                            head_type_idx = network_idx
+                        else:
+                            # if we use a single network with multiple heads, then the head type is the current head idx
+                            head_type_idx = head_idx
+                        self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
+                                                                      head_copy_idx,
+                                                                      self.tp.agent.loss_weights[head_type_idx]))
+                        if self.network_is_local:
+                            output, target_placeholder, input_placeholder = self.output_heads[-1](self.state_embedding)
+                            self.targets.extend(target_placeholder)
+                        else:
+                            output, input_placeholder = self.output_heads[-1](self.state_embedding)
+
+                        self.outputs.extend(output)
+                        self.inputs.extend(input_placeholder)
+
+        # Losses
+        self.losses = []
+        for output_head in self.output_heads:
+            self.losses += output_head.loss
+        self.total_loss = sum(self.losses)
+
+        # Learning rate
+        if self.tp.learning_rate_decay_rate != 0:
+            raise Exception("learning rate decay is not supported in neon")
+
+        # Optimizer
+        if local_network_in_distributed_training and \
+                hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
+            # distributed training and this is the local network instantiation
+            self.optimizer = self.global_network.optimizer
+        else:
+            if tuning_parameters.agent.optimizer_type == 'Adam':
+                self.optimizer = neon.Adam(
+                    learning_rate=tuning_parameters.learning_rate,
+                    gradient_clip_norm=tuning_parameters.clip_gradients
+                )
+            elif tuning_parameters.agent.optimizer_type == 'RMSProp':
+                self.optimizer = neon.RMSProp(
+                    learning_rate=tuning_parameters.learning_rate,
+                    gradient_clip_norm=tuning_parameters.clip_gradients,
+                    decay_rate=0.9,
+                    epsilon=0.01
+                )
+            elif tuning_parameters.agent.optimizer_type == 'LBFGS':
+                raise Exception("LBFGS optimizer is not supported in neon")
+            else:
+                raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))
@@ -0,0 +1,194 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+from ngraph.util.names import name_scope
+import ngraph.frontends.neon as neon
+import numpy as np
+from utils import force_list
+from architectures.neon_components.losses import *
+
+
+class Head:
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        self.head_idx = head_idx
+        self.name = "head"
+        self.output = []
+        self.loss = []
+        self.loss_type = []
+        self.regularizations = []
+        self.loss_weight = force_list(loss_weight)
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.target = []
+        self.input = []
+        self.is_local = is_local
+        self.batch_size = tuning_parameters.batch_size
+
+    def __call__(self, input_layer):
+        """
+        Wrapper for building the module graph including scoping and loss creation
+        :param input_layer: the input to the graph
+        :return: the output of the last layer and the target placeholder
+        """
+        with name_scope(self.get_name()):
+            self._build_module(input_layer)
+
+            self.output = force_list(self.output)
+            self.target = force_list(self.target)
+            self.input = force_list(self.input)
+            self.loss_type = force_list(self.loss_type)
+            self.loss = force_list(self.loss)
+            self.regularizations = force_list(self.regularizations)
+            if self.is_local:
+               self.set_loss()
+
+        if self.is_local:
+            return self.output, self.target, self.input
+        else:
+            return self.output, self.input
+
+    def _build_module(self, input_layer):
+        """
+        Builds the graph of the module
+        :param input_layer: the input to the graph
+        :return: None
+        """
+        pass
+
+    def get_name(self):
+        """
+        Get a formatted name for the module
+        :return: the formatted name
+        """
+        return '{}_{}'.format(self.name, self.head_idx)
+
+    def set_loss(self):
+        """
+        Creates a target placeholder and loss function for each loss_type and regularization
+        :param loss_type: a tensorflow loss function
+        :param scope: the name scope to include the tensors in
+        :return: None
+        """
+        # add losses and target placeholder
+        for idx in range(len(self.loss_type)):
+            # output_axis = ng.make_axis(self.num_actions, name='q_values')
+            batch_axis_full = ng.make_axis(self.batch_size, name='N')
+            target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
+            self.target.append(target)
+            loss = self.loss_type[idx](self.target[-1], self.output[idx],
+                                       weights=self.loss_weight[idx], scope=self.get_name())
+            self.loss.append(loss)
+
+        # add regularizations
+        for regularization in self.regularizations:
+            self.loss.append(regularization)
+
+
+class QHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'q_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            raise Exception("huber loss is not supported in neon")
+        else:
+            self.loss_type = mean_squared_error
+
+    def _build_module(self, input_layer):
+        # Standard Q Network
+        self.output = neon.Sequential([
+                neon.Affine(nout=self.num_actions,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])(input_layer)
+
+
+class DuelingQHead(QHead):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+
+    def _build_module(self, input_layer):
+        # Dueling Network
+        # state value tower - V
+        output_axis = ng.make_axis(self.num_actions, name='q_values')
+
+        state_value = neon.Sequential([
+            neon.Affine(nout=256, activation=neon.Rectlin(),
+                        weight_init=self.weights_init, bias_init=self.biases_init),
+            neon.Affine(nout=1,
+                        weight_init=self.weights_init, bias_init=self.biases_init)
+        ])(input_layer)
+
+        # action advantage tower - A
+        action_advantage_unnormalized = neon.Sequential([
+            neon.Affine(nout=256, activation=neon.Rectlin(),
+                        weight_init=self.weights_init, bias_init=self.biases_init),
+            neon.Affine(axes=output_axis,
+                        weight_init=self.weights_init, bias_init=self.biases_init)
+        ])(input_layer)
+        action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
+
+        repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
+
+        # merge to state-action value function Q
+        self.output = repeated_state_value + action_advantage
+
+
+class MeasurementsPredictionHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'future_measurements_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.num_measurements = tuning_parameters.env.measurements_size[0] \
+            if tuning_parameters.env.measurements_size else 0
+        self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
+        self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            raise Exception("huber loss is not supported in neon")
+        else:
+            self.loss_type = mean_squared_error
+
+    def _build_module(self, input_layer):
+        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
+
+        multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
+
+        # actions expectation tower (expectation stream) - E
+        with name_scope("expectation_stream"):
+            expectation_stream = neon.Sequential([
+                neon.Affine(nout=256, activation=neon.Rectlin(),
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Affine(nout=multistep_measurements_size,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])(input_layer)
+
+        # action fine differences tower (action stream) - A
+        with name_scope("action_stream"):
+            action_stream_unnormalized = neon.Sequential([
+                neon.Affine(nout=256, activation=neon.Rectlin(),
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Affine(nout=self.num_actions * multistep_measurements_size,
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Reshape((self.num_actions, multistep_measurements_size))
+            ])(input_layer)
+            action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
+
+        repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
+        repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
+
+        # merge to future measurements predictions
+        self.output = repeated_expectation_stream + action_stream
+
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+import ngraph.frontends.neon as neon
+from ngraph.util.names import name_scope
+import numpy as np
+
+
+def mean_squared_error(targets, outputs, weights=1.0, scope=""):
+    with name_scope(scope):
+        # TODO: reduce mean over the action axis
+        loss = ng.squared_L2(targets - outputs)
+        weighted_loss = loss * weights
+        return weighted_loss
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+import ngraph.frontends.neon as neon
+from ngraph.util.names import name_scope
+import numpy as np
+
+
+class MiddlewareEmbedder:
+    def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
+        self.name = name
+        self.input = None
+        self.output = None
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.activation_function = activation_function
+
+    def __call__(self, input_layer):
+        with name_scope(self.get_name()):
+            self.input = input_layer
+            self._build_module()
+
+        return self.input, self.output(self.input)
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class FC_Embedder(MiddlewareEmbedder):
+    def _build_module(self):
+        self.output = neon.Sequential([
+                neon.Affine(nout=512, activation=self.activation_function,
+                            weight_init=self.weights_init, bias_init=self.biases_init)])