coach v0.8.0

2025-12-18 03:30:19 +01:00 · 2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions
--- a/architectures/init.py
+++ b/architectures/init.py
@@ -0,0 +1,31 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from architectures.architecture import *
+from logger import failed_imports
+try:
+    from architectures.tensorflow_components.general_network import *
+    from architectures.tensorflow_components.architecture import *
+except ImportError:
+    failed_imports.append("TensorFlow")
+
+try:
+    from architectures.neon_components.general_network import *
+    from architectures.neon_components.architecture import *
+except ImportError:
+    failed_imports.append("Neon")
+
+from architectures.network_wrapper import *
--- a/architectures/architecture.py
+++ b/architectures/architecture.py
@@ -0,0 +1,70 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from configurations import Preset
+
+
+class Architecture:
+    def __init__(self, tuning_parameters, name=""):
+        """
+        :param tuning_parameters: A Preset class instance with all the running paramaters
+        :type tuning_parameters: Preset
+        :param name: The name of the network
+        :param name: string
+        """
+        self.batch_size = tuning_parameters.batch_size
+        self.input_depth = tuning_parameters.env.observation_stack_size
+        self.input_height = tuning_parameters.env.desired_observation_height
+        self.input_width = tuning_parameters.env.desired_observation_width
+        self.num_actions = tuning_parameters.env.action_space_size
+        self.measurements_size = tuning_parameters.env.measurements_size \
+            if tuning_parameters.env.measurements_size else 0
+        self.learning_rate = tuning_parameters.learning_rate
+        self.optimizer = None
+        self.name = name
+        self.tp = tuning_parameters
+
+    def get_model(self, tuning_parameters):
+        """
+        :param tuning_parameters: A Preset class instance with all the running parameters
+        :type tuning_parameters: Preset
+        :return: A model
+        """
+        pass
+
+    def predict(self, inputs):
+        pass
+
+    def train_on_batch(self, inputs, targets):
+        pass
+
+    def get_weights(self):
+        pass
+
+    def set_weights(self, weights, rate=1.0):
+        pass
+
+    def reset_accumulated_gradients(self):
+        pass
+
+    def accumulate_gradients(self, inputs, targets):
+        pass
+
+    def apply_and_reset_gradients(self, gradients):
+        pass
+
+    def apply_gradients(self, gradients):
+        pass
--- a/architectures/neon_components/init.py
+++ b/architectures/neon_components/init.py
--- a/architectures/neon_components/architecture.py
+++ b/architectures/neon_components/architecture.py
@@ -0,0 +1,129 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import copy
+from ngraph.frontends.neon import *
+import ngraph as ng
+from architectures.architecture import *
+import numpy as np
+from utils import *
+
+
+class NeonArchitecture(Architecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        Architecture.__init__(self, tuning_parameters, name)
+        assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent'
+        self.clip_error = tuning_parameters.clip_gradients
+        self.total_loss = None
+        self.epoch = 0
+        self.inputs = []
+        self.outputs = []
+        self.targets = []
+        self.losses = []
+
+        self.transformer = tuning_parameters.sess
+        self.network = self.get_model(tuning_parameters)
+        self.accumulated_gradients = []
+
+        # training and inference ops
+        train_output = ng.sequential([
+            self.optimizer(self.total_loss),
+            self.total_loss
+        ])
+        placeholders = self.inputs + self.targets
+        self.train_op = self.transformer.add_computation(
+            ng.computation(
+                train_output, *placeholders
+            )
+        )
+        self.predict_op = self.transformer.add_computation(
+            ng.computation(
+                self.outputs, self.inputs[0]
+            )
+        )
+
+        # update weights from array op
+        self.weights = [ng.placeholder(w.axes) for w in self.total_loss.variables()]
+        self.set_weights_ops = []
+        for target_variable, variable in zip(self.total_loss.variables(), self.weights):
+            self.set_weights_ops.append(self.transformer.add_computation(
+                ng.computation(
+                    ng.assign(target_variable, variable), variable
+                )
+            ))
+
+        # get weights op
+        self.get_variables = self.transformer.add_computation(
+            ng.computation(
+                self.total_loss.variables()
+            )
+        )
+
+    def predict(self, inputs):
+        batch_size = inputs.shape[0]
+
+        # move batch axis to the end
+        inputs = inputs.swapaxes(0, -1)
+        prediction = self.predict_op(inputs)  # TODO: problem with multiple inputs
+
+        if type(prediction) != tuple:
+            prediction = (prediction)
+
+        # process all the outputs from the network
+        output = []
+        for p in prediction:
+            output.append(p.transpose()[:batch_size].copy())
+
+        # if there is only one output then we don't need a list
+        if len(output) == 1:
+            output = output[0]
+        return output
+
+    def train_on_batch(self, inputs, targets):
+        loss = self.accumulate_gradients(inputs, targets)
+        self.apply_and_reset_gradients(self.accumulated_gradients)
+        return loss
+
+    def get_weights(self):
+        return self.get_variables()
+
+    def set_weights(self, weights, rate=1.0):
+        if rate != 1:
+            current_weights = self.get_weights()
+            updated_weights = [(1 - rate) * t + rate * o for t, o in zip(current_weights, weights)]
+        else:
+            updated_weights = weights
+        for update_function, variable in zip(self.set_weights_ops, updated_weights):
+            update_function(variable)
+
+    def accumulate_gradients(self, inputs, targets):
+        # Neon doesn't currently allow separating the grads calculation and grad apply operations
+        # so this feature is not currently available. instead we do a full training iteration
+        inputs = force_list(inputs)
+        targets = force_list(targets)
+
+        for idx, input in enumerate(inputs):
+            inputs[idx] = input.swapaxes(0, -1)
+
+        for idx, target in enumerate(targets):
+            targets[idx] = np.rollaxis(target, 0, len(target.shape))
+
+        all_inputs = inputs + targets
+
+        loss = np.mean(self.train_op(*all_inputs))
+
+        return [loss]
--- a/architectures/neon_components/embedders.py
+++ b/architectures/neon_components/embedders.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph.frontends.neon as neon
+import ngraph as ng
+from ngraph.util.names import name_scope
+
+
+class InputEmbedder:
+    def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
+        self.name = name
+        self.input_size = input_size
+        self.batch_size = batch_size
+        self.activation_function = activation_function
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.input = None
+        self.output = None
+
+    def __call__(self, prev_input_placeholder=None):
+        with name_scope(self.get_name()):
+            # create the input axes
+            axes = []
+            if len(self.input_size) == 2:
+                axis_names = ['H', 'W']
+            else:
+                axis_names = ['C', 'H', 'W']
+            for axis_size, axis_name in zip(self.input_size, axis_names):
+                axes.append(ng.make_axis(axis_size, name=axis_name))
+            batch_axis_full = ng.make_axis(self.batch_size, name='N')
+            input_axes = ng.make_axes(axes)
+
+            if prev_input_placeholder is None:
+                self.input = ng.placeholder(input_axes + [batch_axis_full])
+            else:
+                self.input = prev_input_placeholder
+            self._build_module()
+
+        return self.input, self.output(self.input)
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class ImageEmbedder(InputEmbedder):
+    def __init__(self, input_size, batch_size=None, input_rescaler=255.0, activation_function=neon.Rectlin(), name="embedder"):
+        InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
+        self.input_rescaler = input_rescaler
+
+    def _build_module(self):
+        # image observation
+        self.output = neon.Sequential([
+            neon.Preprocess(functor=lambda x: x / self.input_rescaler),
+            neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init),
+            neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init),
+            neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
+                             filter_init=self.weights_init, bias_init=self.biases_init)
+        ])
+
+
+class VectorEmbedder(InputEmbedder):
+    def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
+        InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
+
+    def _build_module(self):
+        # vector observation
+        self.output = neon.Sequential([
+                neon.Affine(nout=256, activation=self.activation_function,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])
--- a/architectures/neon_components/general_network.py
+++ b/architectures/neon_components/general_network.py
@@ -0,0 +1,191 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from architectures.neon_components.embedders import *
+from architectures.neon_components.heads import *
+from architectures.neon_components.middleware import *
+from architectures.neon_components.architecture import *
+from configurations import InputTypes, OutputTypes, MiddlewareTypes
+
+
+class GeneralNeonNetwork(NeonArchitecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        self.global_network = global_network
+        self.network_is_local = network_is_local
+        self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.input_embedders = []
+        self.output_heads = []
+        self.activation_function = self.get_activation_function(
+            tuning_parameters.agent.hidden_layers_activation_function)
+
+        NeonArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
+
+    def get_activation_function(self, activation_function_string):
+        activation_functions = {
+            'relu': neon.Rectlin(),
+            'tanh': neon.Tanh(),
+            'sigmoid': neon.Logistic(),
+            'elu': neon.Explin(),
+            'none': None
+        }
+        assert activation_function_string in activation_functions.keys(), \
+            "Activation function must be one of the following {}".format(activation_functions.keys())
+        return activation_functions[activation_function_string]
+
+    def get_input_embedder(self, embedder_type):
+        # the observation can be either an image or a vector
+        def get_observation_embedding(with_timestep=False):
+            if self.input_height > 1:
+                return ImageEmbedder((self.input_depth, self.input_height, self.input_width), self.batch_size,
+                                     name="observation")
+            else:
+                return VectorEmbedder((self.input_depth, self.input_width + int(with_timestep)), self.batch_size,
+                                      name="observation")
+
+        input_mapping = {
+            InputTypes.Observation: get_observation_embedding(),
+            InputTypes.Measurements: VectorEmbedder(self.measurements_size, self.batch_size, name="measurements"),
+            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, self.batch_size, name="goal_vector"),
+            InputTypes.Action: VectorEmbedder((self.num_actions,), self.batch_size, name="action"),
+            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
+        }
+        return input_mapping[embedder_type]
+
+    def get_middleware_embedder(self, middleware_type):
+        return {MiddlewareTypes.LSTM: None,   # LSTM over Neon is currently not supported in Coach
+                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+
+    def get_output_head(self, head_type, head_idx, loss_weight=1.):
+        output_mapping = {
+            OutputTypes.Q: QHead,
+            OutputTypes.DuelingQ: DuelingQHead,
+            OutputTypes.V: None, # Policy Optimization algorithms over Neon are currently not supported in Coach
+            OutputTypes.Pi: None,  # Policy Optimization algorithms over Neon are currently not supported in Coach
+            OutputTypes.MeasurementsPrediction: None, # DFP over Neon is currently not supported in Coach
+            OutputTypes.DNDQ: None,  # NEC over Neon is currently not supported in Coach
+            OutputTypes.NAF: None,  # NAF over Neon is currently not supported in Coach
+            OutputTypes.PPO: None, # PPO over Neon is currently not supported in Coach
+            OutputTypes.PPO_V: None  # PPO over Neon is currently not supported in Coach
+        }
+        return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
+
+    def get_model(self, tuning_parameters):
+        """
+        :param tuning_parameters: A Preset class instance with all the running paramaters
+        :type tuning_parameters: Preset
+        :return: A model
+        """
+        assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
+        assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
+        assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
+        assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
+        assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
+            "Number of loss weights should match the number of output types"
+        local_network_in_distributed_training = self.global_network is not None and self.network_is_local
+
+        tuning_parameters.activation_function = self.activation_function
+        done_creating_input_placeholders = False
+
+        for network_idx in range(self.num_networks):
+            with name_scope('network_{}'.format(network_idx)):
+                ####################
+                # Input Embeddings #
+                ####################
+
+                state_embedding = []
+                for idx, input_type in enumerate(self.tp.agent.input_types):
+                    # get the class of the input embedder
+                    self.input_embedders.append(self.get_input_embedder(input_type))
+
+                    # in the case each head uses a different network, we still reuse the input placeholders
+                    prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
+
+                    # create the input embedder instance and store the input placeholder and the embedding
+                    input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
+                    if len(self.inputs) < len(self.tp.agent.input_types):
+                        self.inputs.append(input_placeholder)
+                    state_embedding.append(embedding)
+
+                done_creating_input_placeholders = True
+
+                ##############
+                # Middleware #
+                ##############
+
+                state_embedding = ng.concat_along_axis(state_embedding, state_embedding[0].axes[0]) \
+                    if len(state_embedding) > 1 else state_embedding[0]
+                self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
+                _, self.state_embedding = self.middleware_embedder(state_embedding)
+
+                ################
+                # Output Heads #
+                ################
+
+                for head_idx in range(self.num_heads_per_network):
+                    for head_copy_idx in range(self.tp.agent.num_output_head_copies):
+                        if self.tp.agent.use_separate_networks_per_head:
+                            # if we use separate networks per head, then the head type corresponds top the network idx
+                            head_type_idx = network_idx
+                        else:
+                            # if we use a single network with multiple heads, then the head type is the current head idx
+                            head_type_idx = head_idx
+                        self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
+                                                                      head_copy_idx,
+                                                                      self.tp.agent.loss_weights[head_type_idx]))
+                        if self.network_is_local:
+                            output, target_placeholder, input_placeholder = self.output_heads[-1](self.state_embedding)
+                            self.targets.extend(target_placeholder)
+                        else:
+                            output, input_placeholder = self.output_heads[-1](self.state_embedding)
+
+                        self.outputs.extend(output)
+                        self.inputs.extend(input_placeholder)
+
+        # Losses
+        self.losses = []
+        for output_head in self.output_heads:
+            self.losses += output_head.loss
+        self.total_loss = sum(self.losses)
+
+        # Learning rate
+        if self.tp.learning_rate_decay_rate != 0:
+            raise Exception("learning rate decay is not supported in neon")
+
+        # Optimizer
+        if local_network_in_distributed_training and \
+                hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
+            # distributed training and this is the local network instantiation
+            self.optimizer = self.global_network.optimizer
+        else:
+            if tuning_parameters.agent.optimizer_type == 'Adam':
+                self.optimizer = neon.Adam(
+                    learning_rate=tuning_parameters.learning_rate,
+                    gradient_clip_norm=tuning_parameters.clip_gradients
+                )
+            elif tuning_parameters.agent.optimizer_type == 'RMSProp':
+                self.optimizer = neon.RMSProp(
+                    learning_rate=tuning_parameters.learning_rate,
+                    gradient_clip_norm=tuning_parameters.clip_gradients,
+                    decay_rate=0.9,
+                    epsilon=0.01
+                )
+            elif tuning_parameters.agent.optimizer_type == 'LBFGS':
+                raise Exception("LBFGS optimizer is not supported in neon")
+            else:
+                raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))
--- a/architectures/neon_components/heads.py
+++ b/architectures/neon_components/heads.py
@@ -0,0 +1,194 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+from ngraph.util.names import name_scope
+import ngraph.frontends.neon as neon
+import numpy as np
+from utils import force_list
+from architectures.neon_components.losses import *
+
+
+class Head:
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        self.head_idx = head_idx
+        self.name = "head"
+        self.output = []
+        self.loss = []
+        self.loss_type = []
+        self.regularizations = []
+        self.loss_weight = force_list(loss_weight)
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.target = []
+        self.input = []
+        self.is_local = is_local
+        self.batch_size = tuning_parameters.batch_size
+
+    def __call__(self, input_layer):
+        """
+        Wrapper for building the module graph including scoping and loss creation
+        :param input_layer: the input to the graph
+        :return: the output of the last layer and the target placeholder
+        """
+        with name_scope(self.get_name()):
+            self._build_module(input_layer)
+
+            self.output = force_list(self.output)
+            self.target = force_list(self.target)
+            self.input = force_list(self.input)
+            self.loss_type = force_list(self.loss_type)
+            self.loss = force_list(self.loss)
+            self.regularizations = force_list(self.regularizations)
+            if self.is_local:
+               self.set_loss()
+
+        if self.is_local:
+            return self.output, self.target, self.input
+        else:
+            return self.output, self.input
+
+    def _build_module(self, input_layer):
+        """
+        Builds the graph of the module
+        :param input_layer: the input to the graph
+        :return: None
+        """
+        pass
+
+    def get_name(self):
+        """
+        Get a formatted name for the module
+        :return: the formatted name
+        """
+        return '{}_{}'.format(self.name, self.head_idx)
+
+    def set_loss(self):
+        """
+        Creates a target placeholder and loss function for each loss_type and regularization
+        :param loss_type: a tensorflow loss function
+        :param scope: the name scope to include the tensors in
+        :return: None
+        """
+        # add losses and target placeholder
+        for idx in range(len(self.loss_type)):
+            # output_axis = ng.make_axis(self.num_actions, name='q_values')
+            batch_axis_full = ng.make_axis(self.batch_size, name='N')
+            target = ng.placeholder(ng.make_axes([self.output[0].axes[0], batch_axis_full]))
+            self.target.append(target)
+            loss = self.loss_type[idx](self.target[-1], self.output[idx],
+                                       weights=self.loss_weight[idx], scope=self.get_name())
+            self.loss.append(loss)
+
+        # add regularizations
+        for regularization in self.regularizations:
+            self.loss.append(regularization)
+
+
+class QHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'q_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            raise Exception("huber loss is not supported in neon")
+        else:
+            self.loss_type = mean_squared_error
+
+    def _build_module(self, input_layer):
+        # Standard Q Network
+        self.output = neon.Sequential([
+                neon.Affine(nout=self.num_actions,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])(input_layer)
+
+
+class DuelingQHead(QHead):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+
+    def _build_module(self, input_layer):
+        # Dueling Network
+        # state value tower - V
+        output_axis = ng.make_axis(self.num_actions, name='q_values')
+
+        state_value = neon.Sequential([
+            neon.Affine(nout=256, activation=neon.Rectlin(),
+                        weight_init=self.weights_init, bias_init=self.biases_init),
+            neon.Affine(nout=1,
+                        weight_init=self.weights_init, bias_init=self.biases_init)
+        ])(input_layer)
+
+        # action advantage tower - A
+        action_advantage_unnormalized = neon.Sequential([
+            neon.Affine(nout=256, activation=neon.Rectlin(),
+                        weight_init=self.weights_init, bias_init=self.biases_init),
+            neon.Affine(axes=output_axis,
+                        weight_init=self.weights_init, bias_init=self.biases_init)
+        ])(input_layer)
+        action_advantage = action_advantage_unnormalized - ng.mean(action_advantage_unnormalized)
+
+        repeated_state_value = ng.expand_dims(ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0)
+
+        # merge to state-action value function Q
+        self.output = repeated_state_value + action_advantage
+
+
+class MeasurementsPredictionHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'future_measurements_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.num_measurements = tuning_parameters.env.measurements_size[0] \
+            if tuning_parameters.env.measurements_size else 0
+        self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
+        self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            raise Exception("huber loss is not supported in neon")
+        else:
+            self.loss_type = mean_squared_error
+
+    def _build_module(self, input_layer):
+        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
+
+        multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead
+
+        # actions expectation tower (expectation stream) - E
+        with name_scope("expectation_stream"):
+            expectation_stream = neon.Sequential([
+                neon.Affine(nout=256, activation=neon.Rectlin(),
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Affine(nout=multistep_measurements_size,
+                            weight_init=self.weights_init, bias_init=self.biases_init)
+            ])(input_layer)
+
+        # action fine differences tower (action stream) - A
+        with name_scope("action_stream"):
+            action_stream_unnormalized = neon.Sequential([
+                neon.Affine(nout=256, activation=neon.Rectlin(),
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Affine(nout=self.num_actions * multistep_measurements_size,
+                            weight_init=self.weights_init, bias_init=self.biases_init),
+                neon.Reshape((self.num_actions, multistep_measurements_size))
+            ])(input_layer)
+            action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)
+
+        repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
+        repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)
+
+        # merge to future measurements predictions
+        self.output = repeated_expectation_stream + action_stream
+
--- a/architectures/neon_components/losses.py
+++ b/architectures/neon_components/losses.py
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+import ngraph.frontends.neon as neon
+from ngraph.util.names import name_scope
+import numpy as np
+
+
+def mean_squared_error(targets, outputs, weights=1.0, scope=""):
+    with name_scope(scope):
+        # TODO: reduce mean over the action axis
+        loss = ng.squared_L2(targets - outputs)
+        weighted_loss = loss * weights
+        return weighted_loss
--- a/architectures/neon_components/middleware.py
+++ b/architectures/neon_components/middleware.py
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ngraph as ng
+import ngraph.frontends.neon as neon
+from ngraph.util.names import name_scope
+import numpy as np
+
+
+class MiddlewareEmbedder:
+    def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
+        self.name = name
+        self.input = None
+        self.output = None
+        self.weights_init = neon.GlorotInit()
+        self.biases_init = neon.ConstantInit()
+        self.activation_function = activation_function
+
+    def __call__(self, input_layer):
+        with name_scope(self.get_name()):
+            self.input = input_layer
+            self._build_module()
+
+        return self.input, self.output(self.input)
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class FC_Embedder(MiddlewareEmbedder):
+    def _build_module(self):
+        self.output = neon.Sequential([
+                neon.Affine(nout=512, activation=self.activation_function,
+                            weight_init=self.weights_init, bias_init=self.biases_init)])
--- a/architectures/network_wrapper.py
+++ b/architectures/network_wrapper.py
@@ -0,0 +1,179 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import OrderedDict
+from configurations import Preset, Frameworks
+from logger import *
+try:
+    import tensorflow as tf
+    from architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork
+except ImportError:
+    failed_imports.append("TensorFlow")
+
+try:
+    from architectures.neon_components.general_network import GeneralNeonNetwork
+except ImportError:
+    failed_imports.append("Neon")
+
+
+class NetworkWrapper:
+    def __init__(self, tuning_parameters, has_target, has_global, name, replicated_device=None, worker_device=None):
+        """
+        
+        :param tuning_parameters: 
+        :type tuning_parameters: Preset
+        :param has_target: 
+        :param has_global: 
+        :param name: 
+        :param replicated_device: 
+        :param worker_device: 
+        """
+        self.tp = tuning_parameters
+        self.has_target = has_target
+        self.has_global = has_global
+        self.name = name
+        self.sess = tuning_parameters.sess
+
+        if self.tp.framework == Frameworks.TensorFlow:
+            general_network = GeneralTensorFlowNetwork
+        elif self.tp.framework == Frameworks.Neon:
+            general_network = GeneralNeonNetwork
+        else:
+            raise Exception("{} Framework is not supported".format(Frameworks().to_string(self.tp.framework)))
+
+        # Global network - the main network shared between threads
+        self.global_network = None
+        if self.has_global:
+            with tf.device(replicated_device):
+                self.global_network = general_network(tuning_parameters, '{}/global'.format(name),
+                                                      network_is_local=False)
+
+        # Online network - local copy of the main network used for playing
+        self.online_network = None
+        with tf.device(worker_device):
+            self.online_network = general_network(tuning_parameters, '{}/online'.format(name),
+                                                  self.global_network, network_is_local=True)
+
+        # Target network - a local, slow updating network used for stabilizing the learning
+        self.target_network = None
+        if self.has_target:
+            with tf.device(worker_device):
+                self.target_network = general_network(tuning_parameters, '{}/target'.format(name),
+                                                      network_is_local=True)
+
+        if not self.tp.distributed and self.tp.framework == Frameworks.TensorFlow:
+            self.model_saver = tf.train.Saver()
+            if self.tp.sess and self.tp.checkpoint_restore_dir:
+                checkpoint = tf.train.latest_checkpoint(self.tp.checkpoint_restore_dir)
+                screen.log_title("Loading checkpoint: {}".format(checkpoint))
+                self.model_saver.restore(self.tp.sess, checkpoint)
+
+    def sync(self):
+        """
+        Initializes the weights of the networks to match each other
+        :return: 
+        """
+        self.update_online_network()
+        self.update_target_network()
+
+    def update_target_network(self, rate=1.0):
+        """
+        Copy weights: online network >>> target network
+        :param rate: the rate of copying the weights - 1 for copying exactly
+        """
+        if self.target_network:
+            self.target_network.set_weights(self.online_network.get_weights(), rate)
+
+    def update_online_network(self, rate=1.0):
+        """
+        Copy weights: global network >>> online network
+        :param rate: the rate of copying the weights - 1 for copying exactly
+        """
+        if self.global_network:
+            self.online_network.set_weights(self.global_network.get_weights(), rate)
+
+    def apply_gradients_to_global_network(self):
+        """
+        Apply gradients from the online network on the global network
+        :return: 
+        """
+        self.global_network.apply_gradients(self.online_network.accumulated_gradients)
+
+    def apply_gradients_to_online_network(self):
+        """
+        Apply gradients from the online network on itself
+        :return: 
+        """
+        self.online_network.apply_gradients(self.online_network.accumulated_gradients)
+
+    def train_and_sync_networks(self, inputs, targets):
+        """
+        A generic training function that enables multi-threading training using a global network if necessary.
+        :param inputs: The inputs for the network.
+        :param targets: The targets corresponding to the given inputs
+        :return: The loss of the training iteration
+        """
+        result = self.online_network.accumulate_gradients(inputs, targets)
+        self.apply_gradients_and_sync_networks()
+        return result
+
+    def apply_gradients_and_sync_networks(self):
+        """
+        Applies the gradients accumulated in the online network to the global network or to itself and syncs the 
+        networks if necessary
+        """
+        if self.global_network:
+            self.apply_gradients_to_global_network()
+            self.online_network.reset_accumulated_gradients()
+            self.update_online_network()
+        else:
+            self.online_network.apply_and_reset_gradients(self.online_network.accumulated_gradients)
+
+    def get_local_variables(self):
+        """
+        Get all the variables that are local to the thread
+        :return: a list of all the variables that are local to the thread
+        """
+        local_variables = [v for v in tf.global_variables() if self.online_network.name in v.name]
+        if self.has_target:
+            local_variables += [v for v in tf.global_variables() if self.target_network.name in v.name]
+        return local_variables
+
+    def get_global_variables(self):
+        """
+        Get all the variables that are shared between threads
+        :return: a list of all the variables that are shared between threads
+        """
+        global_variables = [v for v in tf.global_variables() if self.global_network.name in v.name]
+        return global_variables
+
+    def set_session(self, sess):
+        self.sess = sess
+        self.online_network.sess = sess
+        if self.global_network:
+            self.global_network.sess = sess
+        if self.target_network:
+            self.target_network.sess = sess
+
+    def save_model(self, model_id):
+        saved_model_path = self.model_saver.save(self.tp.sess, os.path.join(self.tp.save_model_dir,
+                                                                        str(model_id) + '.ckpt'))
+        screen.log_dict(
+            OrderedDict([
+                ("Saving model", saved_model_path),
+            ]),
+            prefix="Checkpoint"
+        )
--- a/architectures/tensorflow_components/init.py
+++ b/architectures/tensorflow_components/init.py
--- a/architectures/tensorflow_components/architecture.py
+++ b/architectures/tensorflow_components/architecture.py
@@ -0,0 +1,290 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from architectures.architecture import Architecture
+import tensorflow as tf
+from utils import force_list, squeeze_list
+from configurations import Preset, MiddlewareTypes
+import numpy as np
+import time
+
+
+class TensorFlowArchitecture(Architecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        """
+        :param tuning_parameters: The parameters used for running the algorithm
+        :type tuning_parameters: Preset
+        :param name: The name of the network
+        """
+        Architecture.__init__(self, tuning_parameters, name)
+        self.middleware_embedder = None
+        self.network_is_local = network_is_local
+        assert tuning_parameters.agent.tensorflow_support, 'TensorFlow is not supported for this agent'
+        self.sess = tuning_parameters.sess
+        self.inputs = []
+        self.outputs = []
+        self.targets = []
+        self.losses = []
+        self.total_loss = None
+        self.trainable_weights = []
+        self.weights_placeholders = []
+        self.curr_rnn_c_in = None
+        self.curr_rnn_h_in = None
+        self.gradients_wrt_inputs = []
+
+        self.optimizer_type = self.tp.agent.optimizer_type
+        if self.tp.seed is not None:
+            tf.set_random_seed(self.tp.seed)
+        with tf.variable_scope(self.name, initializer=tf.contrib.layers.xavier_initializer()):
+            self.global_step = tf.contrib.framework.get_or_create_global_step()
+
+            # build the network
+            self.get_model(tuning_parameters)
+
+            # model weights
+            self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
+
+            # locks for synchronous training
+            if self.tp.distributed and not self.tp.agent.async_training and not self.network_is_local:
+                self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
+                                                    initializer=tf.constant_initializer(0, dtype=tf.int32),
+                                                    trainable=False)
+                self.lock = self.lock_counter.assign_add(1, use_locking=True)
+                self.lock_init = self.lock_counter.assign(0)
+    
+                self.release_counter = tf.get_variable("release_counter", [], tf.int32,
+                                                       initializer=tf.constant_initializer(0, dtype=tf.int32),
+                                                       trainable=False)
+                self.release = self.release_counter.assign_add(1, use_locking=True)
+                self.release_init = self.release_counter.assign(0)
+
+            # local network does the optimization so we need to create all the ops we are going to use to optimize
+            for idx, var in enumerate(self.trainable_weights):
+                placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
+                self.weights_placeholders.append(placeholder)
+            self.update_weights_from_list = [weights.assign(holder) for holder, weights in
+                                             zip(self.weights_placeholders, self.trainable_weights)]
+
+            # gradients ops
+            self.tensor_gradients = tf.gradients(self.total_loss, self.trainable_weights)
+            self.gradients_norm = tf.global_norm(self.tensor_gradients)
+            if self.tp.clip_gradients is not None and self.tp.clip_gradients != 0:
+                self.clipped_grads, self.grad_norms = tf.clip_by_global_norm(self.tensor_gradients,
+                                                                             tuning_parameters.clip_gradients)
+
+            # gradients of the outputs w.r.t. the inputs
+            if len(self.outputs) == 1:
+                self.gradients_wrt_inputs = [tf.gradients(self.outputs[0], input_ph) for input_ph in self.inputs]
+                self.gradients_weights_ph = tf.placeholder('float32', self.outputs[0].shape, 'output_gradient_weights')
+                self.weighted_gradients = tf.gradients(self.outputs[0], self.trainable_weights, self.gradients_weights_ph)
+
+            # L2 regularization
+            if self.tp.agent.l2_regularization != 0:
+                self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.trainable_weights])
+                                          * self.tp.agent.l2_regularization]
+                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)
+
+            self.inc_step = self.global_step.assign_add(1)
+
+            # defining the optimization process (for LBFGS we have less control over the optimizer)
+            if self.optimizer_type != 'LBFGS':
+                # no global network, this is a plain simple centralized training
+                self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
+                    zip(self.weights_placeholders, self.trainable_weights), global_step=self.global_step)
+
+            # initialize or restore model
+            if not self.tp.distributed:
+                self.init_op = tf.global_variables_initializer()
+
+                if self.sess:
+                        self.sess.run(self.init_op)
+
+        self.accumulated_gradients = None
+
+    def reset_accumulated_gradients(self):
+        """
+        Reset the gradients accumulation placeholder
+        """
+        if self.accumulated_gradients is None:
+            self.accumulated_gradients = self.tp.sess.run(self.trainable_weights)
+
+        for ix, grad in enumerate(self.accumulated_gradients):
+            self.accumulated_gradients[ix] = grad * 0
+
+    def accumulate_gradients(self, inputs, targets, additional_fetches=None):
+        """
+        Runs a forward pass & backward pass, clips gradients if needed and accumulates them into the accumulation 
+        placeholders
+        :param additional_fetches: Optional tensors to fetch during gradients calculation
+        :param inputs: The input batch for the network
+        :param targets: The targets corresponding to the input batch
+        :return: A list containing the total loss and the individual network heads losses
+        """
+
+        if self.accumulated_gradients is None:
+            self.reset_accumulated_gradients()
+
+        # feed inputs
+        if additional_fetches is None:
+            additional_fetches = []
+        inputs = force_list(inputs)
+
+        feed_dict = dict(zip(self.inputs, inputs))
+
+        # feed targets
+        targets = force_list(targets)
+        for placeholder_idx, target in enumerate(targets):
+            feed_dict[self.targets[placeholder_idx]] = target
+
+        if self.optimizer_type != 'LBFGS':
+            # set the fetches
+            fetches = [self.gradients_norm]
+            if self.tp.clip_gradients:
+                fetches.append(self.clipped_grads)
+            else:
+                fetches.append(self.tensor_gradients)
+            fetches += [self.total_loss, self.losses]
+            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+                fetches.append(self.middleware_embedder.state_out)
+            additional_fetches_start_idx = len(fetches)
+            fetches += additional_fetches
+
+            # feed the lstm state if necessary
+            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+                feed_dict[self.middleware_embedder.c_in] = self.middleware_embedder.c_init
+                feed_dict[self.middleware_embedder.h_in] = self.middleware_embedder.h_init
+
+            # get grads
+            result = self.tp.sess.run(fetches, feed_dict=feed_dict)
+
+            # extract the fetches
+            norm_unclipped_grads, grads, total_loss, losses = result[:4]
+            if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+                (self.curr_rnn_c_in, self.curr_rnn_h_in) = result[4]
+            fetched_tensors = []
+            if len(additional_fetches) > 0:
+                fetched_tensors = result[additional_fetches_start_idx:]
+
+            # accumulate the gradients
+            for idx, grad in enumerate(grads):
+                self.accumulated_gradients[idx] += grad
+
+            return total_loss, losses, norm_unclipped_grads, fetched_tensors
+
+        else:
+            self.optimizer.minimize(session=self.tp.sess, feed_dict=feed_dict)
+
+            return [0]
+
+    def apply_and_reset_gradients(self, gradients, scaler=1.):
+        """
+        Applies the given gradients to the network weights and resets the accumulation placeholder
+        :param gradients: The gradients to use for the update
+        :param scaler: A scaling factor that allows rescaling the gradients before applying them
+        """
+        self.apply_gradients(gradients, scaler)
+        self.reset_accumulated_gradients()
+
+    def apply_gradients(self, gradients, scaler=1.):
+        """
+        Applies the given gradients to the network weights
+        :param gradients: The gradients to use for the update
+        :param scaler: A scaling factor that allows rescaling the gradients before applying them
+        """
+        if self.tp.agent.async_training or not self.tp.distributed:
+            if hasattr(self, 'global_step') and not self.network_is_local:
+                self.tp.sess.run(self.inc_step)
+
+        if self.optimizer_type != 'LBFGS':
+
+            # lock barrier
+            if hasattr(self, 'lock_counter'):
+                self.tp.sess.run(self.lock)
+                while self.tp.sess.run(self.lock_counter) % self.tp.num_threads != 0:
+                    time.sleep(0.00001)
+                # rescale the gradients so that they average out with the gradients from the other workers
+                scaler /= float(self.tp.num_threads)
+
+            # apply gradients
+            if scaler != 1.:
+                for gradient in gradients:
+                    gradient /= scaler
+            feed_dict = dict(zip(self.weights_placeholders, gradients))
+            _ = self.tp.sess.run(self.update_weights_from_batch_gradients, feed_dict=feed_dict)
+
+            # release barrier
+            if hasattr(self, 'release_counter'):
+                self.tp.sess.run(self.release)
+                while self.tp.sess.run(self.release_counter) % self.tp.num_threads != 0:
+                    time.sleep(0.00001)
+
+    def predict(self, inputs):
+        """
+        Run a forward pass of the network using the given input
+        :param inputs: The input for the network
+        :return: The network output
+        """
+
+        feed_dict = dict(zip(self.inputs, force_list(inputs)))
+        if self.tp.agent.middleware_type == MiddlewareTypes.LSTM:
+            feed_dict[self.middleware_embedder.c_in] = self.curr_rnn_c_in
+            feed_dict[self.middleware_embedder.h_in] = self.curr_rnn_h_in
+            output, (self.curr_rnn_c_in, self.curr_rnn_h_in) = self.tp.sess.run([self.outputs, self.middleware_embedder.state_out], feed_dict=feed_dict)
+        else:
+            output = self.tp.sess.run(self.outputs, feed_dict)
+
+        return squeeze_list(output)
+
+    def train_on_batch(self, inputs, targets, scaler=1., additional_fetches=None):
+        """
+        Given a batch of examples and targets, runs a forward pass & backward pass and then applies the gradients
+        :param additional_fetches: Optional tensors to fetch during the training process
+        :param inputs: The input for the network
+        :param targets: The targets corresponding to the input batch
+        :param scaler: A scaling factor that allows rescaling the gradients before applying them
+        :return: The loss of the network
+        """
+        if additional_fetches is None:
+            additional_fetches = []
+        force_list(additional_fetches)
+        loss = self.accumulate_gradients(inputs, targets, additional_fetches=additional_fetches)
+        self.apply_and_reset_gradients(self.accumulated_gradients, scaler)
+        return loss
+
+    def get_weights(self):
+        """
+        :return: a list of tensors containing the network weights for each layer
+        """
+        return self.trainable_weights
+
+    def set_weights(self, weights, new_rate=1.0):
+        """
+        Sets the network weights from the given list of weights tensors
+        """
+        feed_dict = {}
+        old_weights, new_weights = self.tp.sess.run([self.get_weights(), weights])
+        for placeholder_idx, new_weight in enumerate(new_weights):
+            feed_dict[self.weights_placeholders[placeholder_idx]]\
+                = new_rate * new_weight + (1 - new_rate) * old_weights[placeholder_idx]
+        self.tp.sess.run(self.update_weights_from_list, feed_dict)
+
+    def write_graph_to_logdir(self, summary_dir):
+        """
+        Writes the tensorflow graph to the logdir for tensorboard visualization
+        :param summary_dir: the path to the logdir
+        """
+        summary_writer = tf.summary.FileWriter(summary_dir)
+        summary_writer.add_graph(self.sess.graph)
--- a/architectures/tensorflow_components/embedders.py
+++ b/architectures/tensorflow_components/embedders.py
@@ -0,0 +1,73 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorflow as tf
+
+
+class InputEmbedder:
+    def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
+        self.name = name
+        self.input_size = input_size
+        self.activation_function = activation_function
+        self.input = None
+        self.output = None
+
+    def __call__(self, prev_input_placeholder=None):
+        with tf.variable_scope(self.get_name()):
+            if prev_input_placeholder is None:
+                self.input = tf.placeholder("float", shape=(None,) + self.input_size, name=self.get_name())
+            else:
+                self.input = prev_input_placeholder
+            self._build_module()
+
+        return self.input, self.output
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class ImageEmbedder(InputEmbedder):
+    def __init__(self, input_size, input_rescaler=255.0, activation_function=tf.nn.relu, name="embedder"):
+        InputEmbedder.__init__(self, input_size, activation_function, name)
+        self.input_rescaler = input_rescaler
+
+    def _build_module(self):
+        # image observation
+        rescaled_observation_stack = self.input / self.input_rescaler
+        self.observation_conv1 = tf.layers.conv2d(rescaled_observation_stack,
+                                             filters=32, kernel_size=(8, 8), strides=(4, 4),
+                                             activation=self.activation_function, data_format='channels_last')
+        self.observation_conv2 = tf.layers.conv2d(self.observation_conv1,
+                                             filters=64, kernel_size=(4, 4), strides=(2, 2),
+                                             activation=self.activation_function, data_format='channels_last')
+        self.observation_conv3 = tf.layers.conv2d(self.observation_conv2,
+                                             filters=64, kernel_size=(3, 3), strides=(1, 1),
+                                             activation=self.activation_function, data_format='channels_last')
+
+        self.output = tf.contrib.layers.flatten(self.observation_conv3)
+
+
+class VectorEmbedder(InputEmbedder):
+    def __init__(self, input_size, activation_function=tf.nn.relu, name="embedder"):
+        InputEmbedder.__init__(self, input_size, activation_function, name)
+
+    def _build_module(self):
+        # vector observation
+        input_layer = tf.contrib.layers.flatten(self.input)
+        self.output = tf.layers.dense(input_layer, 256, activation=self.activation_function)
--- a/architectures/tensorflow_components/general_network.py
+++ b/architectures/tensorflow_components/general_network.py
@@ -0,0 +1,190 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from architectures.tensorflow_components.embedders import *
+from architectures.tensorflow_components.heads import *
+from architectures.tensorflow_components.middleware import *
+from architectures.tensorflow_components.architecture import *
+from configurations import InputTypes, OutputTypes, MiddlewareTypes
+
+
+class GeneralTensorFlowNetwork(TensorFlowArchitecture):
+    def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True):
+        self.global_network = global_network
+        self.network_is_local = network_is_local
+        self.num_heads_per_network = 1 if tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.num_networks = 1 if not tuning_parameters.agent.use_separate_networks_per_head else \
+            len(tuning_parameters.agent.output_types)
+        self.input_embedders = []
+        self.output_heads = []
+        self.activation_function = self.get_activation_function(
+            tuning_parameters.agent.hidden_layers_activation_function)
+
+        TensorFlowArchitecture.__init__(self, tuning_parameters, name, global_network, network_is_local)
+
+    def get_activation_function(self, activation_function_string):
+        activation_functions = {
+            'relu': tf.nn.relu,
+            'tanh': tf.nn.tanh,
+            'sigmoid': tf.nn.sigmoid,
+            'elu': tf.nn.elu,
+            'none': None
+        }
+        assert activation_function_string in activation_functions.keys(), \
+            "Activation function must be one of the following {}".format(activation_functions.keys())
+        return activation_functions[activation_function_string]
+
+    def get_input_embedder(self, embedder_type):
+        # the observation can be either an image or a vector
+        def get_observation_embedding(with_timestep=False):
+            if self.input_height > 1:
+                return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation")
+            else:
+                return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation")
+
+        input_mapping = {
+            InputTypes.Observation: get_observation_embedding(),
+            InputTypes.Measurements: VectorEmbedder(self.measurements_size, name="measurements"),
+            InputTypes.GoalVector: VectorEmbedder(self.measurements_size, name="goal_vector"),
+            InputTypes.Action: VectorEmbedder((self.num_actions,), name="action"),
+            InputTypes.TimedObservation: get_observation_embedding(with_timestep=True),
+        }
+        return input_mapping[embedder_type]
+
+    def get_middleware_embedder(self, middleware_type):
+        return {MiddlewareTypes.LSTM: LSTM_Embedder,
+                MiddlewareTypes.FC: FC_Embedder}.get(middleware_type)(self.activation_function)
+
+    def get_output_head(self, head_type, head_idx, loss_weight=1.):
+        output_mapping = {
+            OutputTypes.Q: QHead,
+            OutputTypes.DuelingQ: DuelingQHead,
+            OutputTypes.V: VHead,
+            OutputTypes.Pi: PolicyHead,
+            OutputTypes.MeasurementsPrediction: MeasurementsPredictionHead,
+            OutputTypes.DNDQ: DNDQHead,
+            OutputTypes.NAF: NAFHead,
+            OutputTypes.PPO: PPOHead,
+            OutputTypes.PPO_V : PPOVHead,
+            OutputTypes.DistributionalQ: DistributionalQHead
+        }
+        return output_mapping[head_type](self.tp, head_idx, loss_weight, self.network_is_local)
+
+    def get_model(self, tuning_parameters):
+        """
+        :param tuning_parameters: A Preset class instance with all the running paramaters
+        :type tuning_parameters: Preset
+        :return: A model
+        """
+        assert len(self.tp.agent.input_types) > 0, "At least one input type should be defined"
+        assert len(self.tp.agent.output_types) > 0, "At least one output type should be defined"
+        assert self.tp.agent.middleware_type is not None, "Exactly one middleware type should be defined"
+        assert len(self.tp.agent.loss_weights) > 0, "At least one loss weight should be defined"
+        assert len(self.tp.agent.output_types) == len(self.tp.agent.loss_weights), \
+            "Number of loss weights should match the number of output types"
+        local_network_in_distributed_training = self.global_network is not None and self.network_is_local
+
+        tuning_parameters.activation_function = self.activation_function
+        done_creating_input_placeholders = False
+
+        for network_idx in range(self.num_networks):
+            with tf.variable_scope('network_{}'.format(network_idx)):
+                ####################
+                # Input Embeddings #
+                ####################
+
+                state_embedding = []
+                for idx, input_type in enumerate(self.tp.agent.input_types):
+                    # get the class of the input embedder
+                    self.input_embedders.append(self.get_input_embedder(input_type))
+
+                    # in the case each head uses a different network, we still reuse the input placeholders
+                    prev_network_input_placeholder = self.inputs[idx] if done_creating_input_placeholders else None
+
+                    # create the input embedder instance and store the input placeholder and the embedding
+                    input_placeholder, embedding = self.input_embedders[-1](prev_network_input_placeholder)
+                    if len(self.inputs) < len(self.tp.agent.input_types):
+                        self.inputs.append(input_placeholder)
+                    state_embedding.append(embedding)
+
+                done_creating_input_placeholders = True
+
+                ##############
+                # Middleware #
+                ##############
+
+                state_embedding = tf.concat(state_embedding, axis=-1) if len(state_embedding) > 1 else state_embedding[0]
+                self.middleware_embedder = self.get_middleware_embedder(self.tp.agent.middleware_type)
+                _, self.state_embedding = self.middleware_embedder(state_embedding)
+
+                ################
+                # Output Heads #
+                ################
+
+                for head_idx in range(self.num_heads_per_network):
+                    for head_copy_idx in range(self.tp.agent.num_output_head_copies):
+                        if self.tp.agent.use_separate_networks_per_head:
+                            # if we use separate networks per head, then the head type corresponds top the network idx
+                            head_type_idx = network_idx
+                        else:
+                            # if we use a single network with multiple heads, then the head type is the current head idx
+                            head_type_idx = head_idx
+                        self.output_heads.append(self.get_output_head(self.tp.agent.output_types[head_type_idx],
+                                                                      head_copy_idx,
+                                                                      self.tp.agent.loss_weights[head_type_idx]))
+
+                        if self.tp.agent.stop_gradients_from_head[head_idx]:
+                            head_input = tf.stop_gradient(self.state_embedding)
+                        else:
+                            head_input = self.state_embedding
+
+                        # build the head
+                        if self.network_is_local:
+                            output, target_placeholder, input_placeholder = self.output_heads[-1](head_input)
+                            self.targets.extend(target_placeholder)
+                        else:
+                            output, input_placeholder = self.output_heads[-1](head_input)
+
+                        self.outputs.extend(output)
+                        self.inputs.extend(input_placeholder)
+
+        # Losses
+        self.losses = tf.losses.get_losses(self.name)
+        self.losses += tf.losses.get_regularization_losses(self.name)
+        self.total_loss = tf.losses.compute_weighted_loss(self.losses, scope=self.name)
+
+        # Learning rate
+        if self.tp.learning_rate_decay_rate != 0:
+            self.tp.learning_rate = tf.train.exponential_decay(
+                self.tp.learning_rate, self.global_step, decay_steps=self.tp.learning_rate_decay_steps,
+                decay_rate=self.tp.learning_rate_decay_rate, staircase=True)
+
+        # Optimizer
+        if local_network_in_distributed_training and \
+                hasattr(self.tp.agent, "shared_optimizer") and self.tp.agent.shared_optimizer:
+            # distributed training and this is the local network instantiation
+            self.optimizer = self.global_network.optimizer
+        else:
+            if tuning_parameters.agent.optimizer_type == 'Adam':
+                self.optimizer = tf.train.AdamOptimizer(learning_rate=tuning_parameters.learning_rate)
+            elif tuning_parameters.agent.optimizer_type == 'RMSProp':
+                self.optimizer = tf.train.RMSPropOptimizer(self.tp.learning_rate, decay=0.9, epsilon=0.01)
+            elif tuning_parameters.agent.optimizer_type == 'LBFGS':
+                self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.total_loss, method='L-BFGS-B',
+                                                                        options={'maxiter': 25})
+            else:
+                raise Exception("{} is not a valid optimizer type".format(tuning_parameters.agent.optimizer_type))
--- a/architectures/tensorflow_components/heads.py
+++ b/architectures/tensorflow_components/heads.py
@@ -0,0 +1,481 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorflow as tf
+import numpy as np
+from utils import force_list
+
+
+# Used to initialize weights for policy and value output layers
+def normalized_columns_initializer(std=1.0):
+    def _initializer(shape, dtype=None, partition_info=None):
+        out = np.random.randn(*shape).astype(np.float32)
+        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
+        return tf.constant(out)
+    return _initializer
+
+
+class Head:
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        self.head_idx = head_idx
+        self.name = "head"
+        self.output = []
+        self.loss = []
+        self.loss_type = []
+        self.regularizations = []
+        self.loss_weight = force_list(loss_weight)
+        self.target = []
+        self.input = []
+        self.is_local = is_local
+
+    def __call__(self, input_layer):
+        """
+        Wrapper for building the module graph including scoping and loss creation
+        :param input_layer: the input to the graph
+        :return: the output of the last layer and the target placeholder
+        """
+        with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
+            self._build_module(input_layer)
+
+            self.output = force_list(self.output)
+            self.target = force_list(self.target)
+            self.input = force_list(self.input)
+            self.loss_type = force_list(self.loss_type)
+            self.loss = force_list(self.loss)
+            self.regularizations = force_list(self.regularizations)
+            if self.is_local:
+               self.set_loss()
+
+        if self.is_local:
+            return self.output, self.target, self.input
+        else:
+            return self.output, self.input
+
+    def _build_module(self, input_layer):
+        """
+        Builds the graph of the module
+        :param input_layer: the input to the graph
+        :return: None
+        """
+        pass
+
+    def get_name(self):
+        """
+        Get a formatted name for the module
+        :return: the formatted name
+        """
+        return '{}_{}'.format(self.name, self.head_idx)
+
+    def set_loss(self):
+        """
+        Creates a target placeholder and loss function for each loss_type and regularization
+        :param loss_type: a tensorflow loss function
+        :param scope: the name scope to include the tensors in
+        :return: None
+        """
+        # add losses and target placeholder
+        for idx in range(len(self.loss_type)):
+            target = tf.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name()))
+            self.target.append(target)
+            loss = self.loss_type[idx](self.target[-1], self.output[idx],
+                                       weights=self.loss_weight[idx], scope=self.get_name())
+            self.loss.append(loss)
+
+        # add regularizations
+        for regularization in self.regularizations:
+            self.loss.append(regularization)
+
+
+class QHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'q_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            self.loss_type = tf.losses.huber_loss
+        else:
+            self.loss_type = tf.losses.mean_squared_error
+
+    def _build_module(self, input_layer):
+        # Standard Q Network
+        self.output = tf.layers.dense(input_layer, self.num_actions, name='output')
+
+
+class DuelingQHead(QHead):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        QHead.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+
+    def _build_module(self, input_layer):
+        # state value tower - V
+        with tf.variable_scope("state_value"):
+            state_value = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
+            state_value = tf.layers.dense(state_value, 1)
+            # state_value = tf.expand_dims(state_value, axis=-1)
+
+        # action advantage tower - A
+        with tf.variable_scope("action_advantage"):
+            action_advantage = tf.layers.dense(input_layer, 256, activation=tf.nn.relu)
+            action_advantage = tf.layers.dense(action_advantage, self.num_actions)
+            action_advantage = action_advantage - tf.reduce_mean(action_advantage)
+
+        # merge to state-action value function Q
+        self.output = tf.add(state_value, action_advantage, name='output')
+
+
+class VHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'v_values_head'
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            self.loss_type = tf.losses.huber_loss
+        else:
+            self.loss_type = tf.losses.mean_squared_error
+
+    def _build_module(self, input_layer):
+        # Standard V Network
+        self.output = tf.layers.dense(input_layer, 1, name='output',
+                                            kernel_initializer=normalized_columns_initializer(1.0))
+
+
+class PolicyHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'policy_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
+        self.discrete_controls = tuning_parameters.env_instance.discrete_controls
+        self.exploration_policy = tuning_parameters.exploration.policy
+        self.exploration_variance = 2*self.output_scale*tuning_parameters.exploration.initial_noise_variance_percentage
+        if not self.discrete_controls and not self.output_scale:
+            raise ValueError("For continuous controls, an output scale for the network must be specified")
+        self.beta = tuning_parameters.agent.beta_entropy
+
+    def _build_module(self, input_layer):
+        eps = 1e-15
+        if self.discrete_controls:
+            self.actions = tf.placeholder(tf.int32, [None], name="actions")
+        else:
+            self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
+        self.input = [self.actions]
+
+        # Policy Head
+        if self.discrete_controls:
+            policy_values = tf.layers.dense(input_layer, self.num_actions)
+            self.policy_mean = tf.nn.softmax(policy_values, name="policy")
+
+            # define the distributions for the policy and the old policy
+            self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
+            self.output = self.policy_mean
+        else:
+            # mean
+            policy_values_mean = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh)
+            self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')
+
+            self.output = [self.policy_mean]
+
+            # std
+            if self.exploration_policy == 'ContinuousEntropy':
+                policy_values_std = tf.layers.dense(input_layer, self.num_actions,
+                                            kernel_initializer=normalized_columns_initializer(0.01))
+                self.policy_std = tf.nn.softplus(policy_values_std, name='output_variance') + eps
+
+                self.output.append(self.policy_std)
+
+            else:
+                self.policy_std = tf.constant(self.exploration_variance, dtype='float32', shape=(self.num_actions,))
+
+            # define the distributions for the policy and the old policy
+            self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean,
+                                                                                       self.policy_std)
+
+        if self.is_local:
+            # add entropy regularization
+            if self.beta:
+                self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
+                self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
+                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+
+            # calculate loss
+            self.action_log_probs_wrt_policy = self.policy_distribution.log_prob(self.actions)
+            self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
+            self.target = self.advantages
+            self.loss = -tf.reduce_mean(self.action_log_probs_wrt_policy * self.advantages)
+            tf.losses.add_loss(self.loss_weight[0] * self.loss)
+
+
+class MeasurementsPredictionHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'future_measurements_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.num_measurements = tuning_parameters.env.measurements_size[0] \
+            if tuning_parameters.env.measurements_size else 0
+        self.num_prediction_steps = tuning_parameters.agent.num_predicted_steps_ahead
+        self.multi_step_measurements_size = self.num_measurements * self.num_prediction_steps
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            self.loss_type = tf.losses.huber_loss
+        else:
+            self.loss_type = tf.losses.mean_squared_error
+
+    def _build_module(self, input_layer):
+        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
+        # actions expectation tower (expectation stream) - E
+        with tf.variable_scope("expectation_stream"):
+            expectation_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
+            expectation_stream = tf.layers.dense(expectation_stream, self.multi_step_measurements_size)
+            expectation_stream = tf.expand_dims(expectation_stream, axis=1)
+
+        # action fine differences tower (action stream) - A
+        with tf.variable_scope("action_stream"):
+            action_stream = tf.layers.dense(input_layer, 256, activation=tf.nn.elu)
+            action_stream = tf.layers.dense(action_stream, self.num_actions * self.multi_step_measurements_size)
+            action_stream = tf.reshape(action_stream,
+                                       (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
+            action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keep_dims=True)
+
+        # merge to future measurements predictions
+        self.output = tf.add(expectation_stream, action_stream, name='output')
+
+
+class DNDQHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'dnd_q_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.DND_size = tuning_parameters.agent.dnd_size
+        self.DND_key_error_threshold = tuning_parameters.agent.DND_key_error_threshold
+        self.l2_norm_added_delta = tuning_parameters.agent.l2_norm_added_delta
+        self.new_value_shift_coefficient = tuning_parameters.agent.new_value_shift_coefficient
+        self.number_of_nn = tuning_parameters.agent.number_of_knn
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            self.loss_type = tf.losses.huber_loss
+        else:
+            self.loss_type = tf.losses.mean_squared_error
+
+    def _build_module(self, input_layer):
+        # DND based Q head
+        from memories import differentiable_neural_dictionary
+        self.DND = differentiable_neural_dictionary. QDND(
+            self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient,
+            key_error_threshold=self.DND_key_error_threshold)
+
+        # Retrieve info from DND dictionary
+        self.action = tf.placeholder(tf.int8, [None], name="action")
+        self.input = self.action
+        result = tf.py_func(self.DND.query,
+                            [input_layer, self.action, self.number_of_nn],
+                            [tf.float64, tf.float64])
+        self.dnd_embeddings = tf.to_float(result[0])
+        self.dnd_values = tf.to_float(result[1])
+
+        # DND calculation
+        square_diff = tf.square(self.dnd_embeddings - tf.expand_dims(input_layer, 1))
+        distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
+        weights = 1.0 / distances
+        normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True)
+        self.output = tf.reduce_sum(self.dnd_values * normalised_weights, axis=1)
+
+
+class NAFHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'naf_q_values_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
+        if tuning_parameters.agent.replace_mse_with_huber_loss:
+            self.loss_type = tf.losses.huber_loss
+        else:
+            self.loss_type = tf.losses.mean_squared_error
+
+    def _build_module(self, input_layer):
+        # NAF
+        self.action = tf.placeholder(tf.float32, [None, self.num_actions], name="action")
+        self.input = self.action
+
+        # V Head
+        self.V = tf.layers.dense(input_layer, 1, name='V')
+
+        # mu Head
+        mu_unscaled = tf.layers.dense(input_layer, self.num_actions, activation=tf.nn.tanh, name='mu_unscaled')
+        self.mu = tf.multiply(mu_unscaled, self.output_scale, name='mu')
+
+        # A Head
+        # l_vector is a vector that includes a lower-triangular matrix values
+        self.l_vector = tf.layers.dense(input_layer, (self.num_actions * (self.num_actions + 1)) / 2, name='l_vector')
+
+        # Convert l to a lower triangular matrix and exponentiate its diagonal
+
+        i = 0
+        columns = []
+        for col in range(self.num_actions):
+            start_row = col
+            num_non_zero_elements = self.num_actions - start_row
+            zeros_column_part = tf.zeros_like(self.l_vector[:, 0:start_row])
+            diag_element = tf.expand_dims(tf.exp(self.l_vector[:, i]), 1)
+            non_zeros_non_diag_column_part = self.l_vector[:, (i + 1):(i + num_non_zero_elements)]
+            columns.append(tf.concat([zeros_column_part, diag_element, non_zeros_non_diag_column_part], axis=1))
+            i += num_non_zero_elements
+        self.L = tf.transpose(tf.stack(columns, axis=1), (0, 2, 1))
+
+        # P = L*L^T
+        self.P = tf.matmul(self.L, tf.transpose(self.L, (0, 2, 1)))
+
+        # A = -1/2 * (u - mu)^T * P * (u - mu)
+        action_diff = tf.expand_dims(self.action - self.mu, -1)
+        a_matrix_form = -0.5 * tf.matmul(tf.transpose(action_diff, (0, 2, 1)), tf.matmul(self.P, action_diff))
+        self.A = tf.reshape(a_matrix_form, [-1, 1])
+
+        # Q Head
+        self.Q = tf.add(self.V, self.A, name='Q')
+
+        self.output = self.Q
+
+
+class PPOHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'ppo_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.discrete_controls = tuning_parameters.env_instance.discrete_controls
+        self.output_scale = np.max(tuning_parameters.env_instance.action_space_abs_range)
+        self.kl_coefficient = tf.Variable(tuning_parameters.agent.initial_kl_coefficient,
+                                          trainable=False, name='kl_coefficient')
+        self.kl_cutoff = 2*tuning_parameters.agent.target_kl_divergence
+        self.high_kl_penalty_coefficient = tuning_parameters.agent.high_kl_penalty_coefficient
+        self.clip_likelihood_ratio_using_epsilon = tuning_parameters.agent.clip_likelihood_ratio_using_epsilon
+        self.use_kl_regularization = tuning_parameters.agent.use_kl_regularization
+        self.beta = tuning_parameters.agent.beta_entropy
+
+
+    def _build_module(self, input_layer):
+        eps = 1e-15
+
+        if self.discrete_controls:
+            self.actions = tf.placeholder(tf.int32, [None], name="actions")
+        else:
+            self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
+        self.old_policy_mean = tf.placeholder(tf.float32, [None, self.num_actions], "old_policy_mean")
+        self.old_policy_std = tf.placeholder(tf.float32, [None, self.num_actions], "old_policy_std")
+
+        # Policy Head
+        if self.discrete_controls:
+            self.input = [self.actions, self.old_policy_mean]
+            policy_values = tf.layers.dense(input_layer, self.num_actions)
+            self.policy_mean = tf.nn.softmax(policy_values, name="policy")
+
+            # define the distributions for the policy and the old policy
+            self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
+            self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
+
+            self.output = self.policy_mean
+        else:
+            self.input = [self.actions, self.old_policy_mean, self.old_policy_std]
+            self.policy_mean = tf.layers.dense(input_layer, self.num_actions, name='policy_mean')
+            self.policy_logstd = tf.Variable(np.zeros((1, self.num_actions)), dtype='float32')
+            self.policy_std = tf.tile(tf.exp(self.policy_logstd), [tf.shape(input_layer)[0], 1], name='policy_std')
+
+            # define the distributions for the policy and the old policy
+            self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean,
+                                                                                       self.policy_std)
+            self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean,
+                                                                                           self.old_policy_std)
+
+            self.output = [self.policy_mean, self.policy_std]
+
+        self.action_probs_wrt_policy = tf.exp(self.policy_distribution.log_prob(self.actions))
+        self.action_probs_wrt_old_policy = tf.exp(self.old_policy_distribution.log_prob(self.actions))
+        self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
+
+        # add kl divergence regularization
+        self.kl_divergence = tf.reduce_mean(tf.contrib.distributions.kl_divergence(self.old_policy_distribution,
+                                                                        self.policy_distribution))
+        if self.use_kl_regularization:
+            # no clipping => use kl regularization
+            self.weighted_kl_divergence = tf.multiply(self.kl_coefficient, self.kl_divergence)
+            self.regularizations = self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
+                                                            tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))
+            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+
+        # calculate surrogate loss
+        self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
+        self.target = self.advantages
+        self.likelihood_ratio = self.action_probs_wrt_policy / self.action_probs_wrt_old_policy
+        if self.clip_likelihood_ratio_using_epsilon is not None:
+            max_value = 1 + self.clip_likelihood_ratio_using_epsilon
+            min_value = 1 - self.clip_likelihood_ratio_using_epsilon
+            self.clipped_likelihood_ratio = tf.clip_by_value(self.likelihood_ratio, min_value, max_value)
+            self.scaled_advantages = tf.minimum(self.likelihood_ratio * self.advantages,
+                                                self.clipped_likelihood_ratio * self.advantages)
+        else:
+            self.scaled_advantages = self.likelihood_ratio * self.advantages
+        # minus sign is in order to set an objective to minimize (we actually strive for maximizing the surrogate loss)
+        self.surrogate_loss = -tf.reduce_mean(self.scaled_advantages)
+        if self.is_local:
+            # add entropy regularization
+            if self.beta:
+                self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
+                self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
+                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+
+        self.loss = self.surrogate_loss
+        tf.losses.add_loss(self.loss)
+
+
+class PPOVHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'ppo_v_head'
+        self.clip_likelihood_ratio_using_epsilon = tuning_parameters.agent.clip_likelihood_ratio_using_epsilon
+
+    def _build_module(self, input_layer):
+        self.old_policy_value = tf.placeholder(tf.float32, [None], "old_policy_values")
+        self.input = [self.old_policy_value]
+        self.output = tf.layers.dense(input_layer, 1, name='output',
+                                            kernel_initializer=normalized_columns_initializer(1.0))
+        self.target = self.total_return = tf.placeholder(tf.float32, [None], name="total_return")
+
+        value_loss_1 = tf.square(self.output - self.target)
+        value_loss_2 = tf.square(self.old_policy_value +
+                                 tf.clip_by_value(self.output - self.old_policy_value,
+                                                  -self.clip_likelihood_ratio_using_epsilon,
+                                                  self.clip_likelihood_ratio_using_epsilon) - self.target)
+        self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
+        self.loss = self.vf_loss
+        tf.losses.add_loss(self.loss)
+
+
+class DistributionalQHead(Head):
+    def __init__(self, tuning_parameters, head_idx=0, loss_weight=1., is_local=True):
+        Head.__init__(self, tuning_parameters, head_idx, loss_weight, is_local)
+        self.name = 'distributional_dqn_head'
+        self.num_actions = tuning_parameters.env_instance.action_space_size
+        self.num_atoms = tuning_parameters.agent.atoms
+
+    def _build_module(self, input_layer):
+        self.actions = tf.placeholder(tf.int32, [None], name="actions")
+        self.input = [self.actions]
+
+        values_distribution = tf.layers.dense(input_layer, self.num_actions * self.num_atoms)
+        values_distribution = tf.reshape(values_distribution, (tf.shape(values_distribution)[0], self.num_actions, self.num_atoms))
+        # softmax on atoms dimension
+        self.output = tf.nn.softmax(values_distribution)
+
+        # calculate cross entropy loss
+        self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), name="distributions")
+        self.target = self.distributions
+        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
+        tf.losses.add_loss(self.loss)
+
--- a/architectures/tensorflow_components/middleware.py
+++ b/architectures/tensorflow_components/middleware.py
@@ -0,0 +1,65 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorflow as tf
+import numpy as np
+
+
+class MiddlewareEmbedder:
+    def __init__(self, activation_function=tf.nn.relu, name="middleware_embedder"):
+        self.name = name
+        self.input = None
+        self.output = None
+        self.activation_function = activation_function
+
+    def __call__(self, input_layer):
+        with tf.variable_scope(self.get_name()):
+            self.input = input_layer
+            self._build_module()
+
+        return self.input, self.output
+
+    def _build_module(self):
+        pass
+
+    def get_name(self):
+        return self.name
+
+
+class LSTM_Embedder(MiddlewareEmbedder):
+    def _build_module(self):
+
+        middleware = tf.layers.dense(self.input, 512, activation=self.activation_function)
+        lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
+        self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
+        self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
+        self.state_init = [self.c_init, self.h_init]
+        self.c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
+        self.h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
+        self.state_in = (self.c_in, self.h_in)
+        rnn_in = tf.expand_dims(middleware, [0])
+        step_size = tf.shape(middleware)[:1]
+        state_in = tf.contrib.rnn.LSTMStateTuple(self.c_in, self.h_in)
+        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
+            lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False)
+        lstm_c, lstm_h = lstm_state
+        self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
+        self.output = tf.reshape(lstm_outputs, [-1, 256])
+
+
+class FC_Embedder(MiddlewareEmbedder):
+    def _build_module(self):
+        self.output = tf.layers.dense(self.input, 512, activation=self.activation_function)
--- a/architectures/tensorflow_components/shared_variables.py
+++ b/architectures/tensorflow_components/shared_variables.py
@@ -0,0 +1,81 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorflow as tf
+import numpy as np
+
+
+class SharedRunningStats(object):
+    def __init__(self, tuning_parameters, replicated_device, epsilon=1e-2, shape=(), name=""):
+        self.tp = tuning_parameters
+        with tf.device(replicated_device):
+            with tf.variable_scope(name):
+                self._sum = tf.get_variable(
+                    dtype=tf.float64,
+                    shape=shape,
+                    initializer=tf.constant_initializer(0.0),
+                    name="running_sum", trainable=False)
+                self._sum_squared = tf.get_variable(
+                    dtype=tf.float64,
+                    shape=shape,
+                    initializer=tf.constant_initializer(epsilon),
+                    name="running_sum_squared", trainable=False)
+                self._count = tf.get_variable(
+                    dtype=tf.float64,
+                    shape=(),
+                    initializer=tf.constant_initializer(epsilon),
+                    name="count", trainable=False)
+
+                self._shape = shape
+                self._mean = tf.to_float(self._sum / self._count)
+                self._std = tf.sqrt(tf.maximum(tf.to_float(self._sum_squared / self._count) - tf.square(self._mean), 1e-2))
+
+                self.new_sum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
+                self.new_sum_squared = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
+                self.newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
+
+                self._inc_sum = tf.assign_add(self._sum, self.new_sum, use_locking=True)
+                self._inc_sum_squared = tf.assign_add(self._sum_squared, self.new_sum_squared, use_locking=True)
+                self._inc_count = tf.assign_add(self._count, self.newcount, use_locking=True)
+
+    def push(self, x):
+        x = x.astype('float64')
+        self.tp.sess.run([self._inc_sum, self._inc_sum_squared, self._inc_count],
+                         feed_dict={
+                             self.new_sum: x.sum(axis=0).ravel(),
+                             self.new_sum_squared: np.square(x).sum(axis=0).ravel(),
+                             self.newcount: np.array(len(x), dtype='float64')
+                         })
+
+    @property
+    def n(self):
+        return self.tp.sess.run(self._count)
+
+    @property
+    def mean(self):
+        return self.tp.sess.run(self._mean)
+
+    @property
+    def var(self):
+        return self.std ** 2
+
+    @property
+    def std(self):
+        return self.tp.sess.run(self._std)
+
+    @property
+    def shape(self):
+        return self._shape