network_imporvements branch merge

2026-07-09 02:46:33 +02:00 · 2018-10-02 13:41:46 +03:00
parent 72ea933384
commit 51726a5b80
110 changed files with 1639 additions and 1161 deletions
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition


 class CategoricalQHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class CategoricalQHead(Head):
@@ -54,3 +58,12 @@ class CategoricalQHead(Head):
        self.target = self.distributions
        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
        tf.losses.add_loss(self.loss)
+
+    def __str__(self):
+        result = [
+            "Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
+            "Reshape (output size = {} x {})".format(self.num_actions, self.num_atoms),
+            "Softmax"
+        ]
+        return '\n'.join(result)
+
@@ -16,27 +16,34 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout

 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import QActionStateValue
 from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
+from rl_coach.utils import force_list


 class RegressionHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
        super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class RegressionHead(Head):
    def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
                 head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
-                 dense_layer=Dense):
+                 dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
        super().__init__(agent_parameters, spaces, network_name, head_idx, loss_weight, is_local, activation_function,
                         dense_layer=dense_layer)
        self.name = 'regression_head'
+        self.scheme = scheme
+        self.layers = []
        if isinstance(self.spaces.action, BoxActionSpace):
            self.num_actions = self.spaces.action.shape[0]
        elif isinstance(self.spaces.action, DiscreteActionSpace):
@@ -48,9 +55,18 @@ class RegressionHead(Head):
            self.loss_type = tf.losses.mean_squared_error

    def _build_module(self, input_layer):
-        self.fc1 = self.dense_layer(256)(input_layer)
-        self.fc2 = self.dense_layer(256)(self.fc1)
-        self.output = self.dense_layer(self.num_actions)(self.fc2, name='output')
+        self.layers.append(input_layer)
+        for idx, layer_params in enumerate(self.scheme):
+            self.layers.extend(force_list(
+                layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
+            ))

+        self.layers.append(self.dense_layer(self.num_actions)(self.layers[-1], name='output'))
+        self.output = self.layers[-1]

+    def __str__(self):
+        result = []
+        for layer in self.layers:
+            result.append(str(layer))
+        return '\n'.join(result)

@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
+from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import ActionProbabilities
@@ -25,9 +25,12 @@ from rl_coach.spaces import SpacesDefinition

 class DDPGActorHeadParameters(HeadParameters):
    def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
-                 dense_layer=Dense):
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)
        self.batchnorm = batchnorm


@@ -56,7 +59,7 @@ class DDPGActor(Head):
        pre_activation_policy_values_mean = self.dense_layer(self.num_actions)(input_layer, name='fc_mean')
        policy_values_mean = batchnorm_activation_dropout(pre_activation_policy_values_mean, self.batchnorm,
                                                          self.activation_function,
-                                                          False, 0, 0)[-1]
+                                                          False, 0, is_training=False, name="BatchnormActivationDropout_0")[-1]
        self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')

        if self.is_local:
@@ -66,3 +69,9 @@ class DDPGActor(Head):
                    [self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]

        self.output = [self.policy_mean]
+
+    def __str__(self):
+        result = [
+            'Dense (num outputs = {})'.format(self.num_actions[0])
+        ]
+        return '\n'.join(result)
@@ -15,7 +15,7 @@
 #
 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
 from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
 from rl_coach.base_parameters import AgentParameters
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition


 class DNDQHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class DNDQHead(QHead):
@@ -89,3 +93,9 @@ class DNDQHead(QHead):
        # DND gradients
        self.dnd_embeddings_grad = tf.gradients(self.loss[0], self.dnd_embeddings)
        self.dnd_values_grad = tf.gradients(self.loss[0], self.dnd_values)
+
+    def __str__(self):
+        result = [
+            "DND fetch (num outputs = {})".format(self.num_actions)
+        ]
+        return '\n'.join(result)
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
 from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
 from rl_coach.base_parameters import AgentParameters
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition


 class DuelingQHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', dense_layer=Dense):
-        super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name, dense_layer=dense_layer)
-
+    def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
+        super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)

 class DuelingQHead(QHead):
    def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
@@ -51,3 +55,16 @@ class DuelingQHead(QHead):

        # merge to state-action value function Q
        self.output = tf.add(self.state_value, self.action_advantage, name='output')
+
+    def __str__(self):
+        result = [
+            "State Value Stream - V",
+            "\tDense (num outputs = 512)",
+            "\tDense (num outputs = 1)",
+            "Action Advantage Stream - A",
+            "\tDense (num outputs = 512)",
+            "\tDense (num outputs = {})".format(self.num_actions),
+            "\tSubtract(A, Mean(A))".format(self.num_actions),
+            "Add (V, A)"
+        ]
+        return '\n'.join(result)
@@ -18,7 +18,7 @@ from typing import Type
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.ops.losses.losses_impl import Reduction
-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.base_parameters import AgentParameters, Parameters, NetworkComponentParameters
 from rl_coach.spaces import SpacesDefinition
 from rl_coach.utils import force_list
@@ -35,10 +35,14 @@ def normalized_columns_initializer(std=1.0):

 class HeadParameters(NetworkComponentParameters):
    def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
-                 dense_layer=Dense):
+                 num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
+                 loss_weight: float=1.0, dense_layer=Dense):
        super().__init__(dense_layer=dense_layer)
        self.activation_function = activation_function
        self.name = name
+        self.num_output_head_copies = num_output_head_copies
+        self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
+        self.loss_weight = loss_weight
        self.parameterized_class_name = parameterized_class.__name__


@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition

 class MeasurementsPredictionHeadParameters(HeadParameters):
    def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
-                 dense_layer=Dense):
-        super().__init__(parameterized_class=MeasurementsPredictionHead,
-                         activation_function=activation_function, name=name, dense_layer=dense_layer)
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
+        super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class MeasurementsPredictionHead(Head):
@@ -68,3 +71,17 @@ class MeasurementsPredictionHead(Head):
        targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target)
        self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0))
        tf.losses.add_loss(self.loss_weight[0] * self.loss)
+
+    def __str__(self):
+        result = [
+            "State Value Stream - V",
+            "\tDense (num outputs = 256)",
+            "\tDense (num outputs = {})".format(self.multi_step_measurements_size),
+            "Action Advantage Stream - A",
+            "\tDense (num outputs = 256)",
+            "\tDense (num outputs = {})".format(self.num_actions * self.multi_step_measurements_size),
+            "\tReshape (new size = {} x {})".format(self.num_actions, self.multi_step_measurements_size),
+            "\tSubtract(A, Mean(A))".format(self.num_actions),
+            "Add (V, A)"
+        ]
+        return '\n'.join(result)
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import QActionStateValue
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition


 class NAFHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class NAFHead(Head):
@@ -90,3 +94,21 @@ class NAFHead(Head):
        self.Q = tf.add(self.V, self.A, name='Q')

        self.output = self.Q
+
+    def __str__(self):
+        result = [
+            "State Value Stream - V",
+            "\tDense (num outputs = 1)",
+            "Action Advantage Stream - A",
+            "\tDense (num outputs = {})".format((self.num_actions * (self.num_actions + 1)) / 2),
+            "\tReshape to lower triangular matrix L (new size = {} x {})".format(self.num_actions, self.num_actions),
+            "\tP = L*L^T",
+            "\tA = -1/2 * (u - mu)^T * P * (u - mu)",
+            "Action Stream - mu",
+            "\tDense (num outputs = {})".format(self.num_actions),
+            "\tActivation (type = {})".format(self.activation_function.__name__),
+            "\tMultiply (factor = {})".format(self.output_scale),
+            "State-Action Value Stream - Q",
+            "\tAdd (V, A)"
+        ]
+        return '\n'.join(result)
@@ -17,20 +17,25 @@
 import numpy as np
 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import ActionProbabilities
 from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
 from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
 from rl_coach.spaces import SpacesDefinition
-from rl_coach.utils import eps
+from rl_coach.utils import eps, indent_string


 class PolicyHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)
+


 class PolicyHead(Head):
@@ -112,7 +117,7 @@ class PolicyHead(Head):
        self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions"))

        # output activation function
-        if np.all(self.spaces.action.max_abs_range < np.inf):
+        if np.all(action_space.max_abs_range < np.inf):
            # bounded actions
            self.output_scale = action_space.max_abs_range
            self.continuous_output_activation = self.activation_function
@@ -158,3 +163,45 @@ class PolicyHead(Head):
            if self.action_penalty and self.action_penalty != 0:
                self.regularizations += [
                    self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
+
+    def __str__(self):
+        action_spaces = [self.spaces.action]
+        if isinstance(self.spaces.action, CompoundActionSpace):
+            action_spaces = self.spaces.action.sub_action_spaces
+
+        result = []
+        for action_space_idx, action_space in enumerate(action_spaces):
+            action_head_mean_result = []
+            if isinstance(action_space, DiscreteActionSpace):
+                # create a discrete action network (softmax probabilities output)
+                action_head_mean_result.append("Dense (num outputs = {})".format(len(action_space.actions)))
+                action_head_mean_result.append("Softmax")
+            elif isinstance(action_space, BoxActionSpace):
+                # create a continuous action network (bounded mean and stdev outputs)
+                action_head_mean_result.append("Dense (num outputs = {})".format(action_space.shape))
+                if np.all(action_space.max_abs_range < np.inf):
+                    # bounded actions
+                    action_head_mean_result.append("Activation (type = {})".format(self.activation_function.__name__))
+                    action_head_mean_result.append("Multiply (factor = {})".format(action_space.max_abs_range))
+
+            action_head_stdev_result = []
+            if isinstance(self.exploration_policy, ContinuousEntropyParameters):
+                action_head_stdev_result.append("Dense (num outputs = {})".format(action_space.shape))
+                action_head_stdev_result.append("Softplus")
+
+            action_head_result = []
+            if action_head_stdev_result:
+                action_head_result.append("Mean Stream")
+                action_head_result.append(indent_string('\n'.join(action_head_mean_result)))
+                action_head_result.append("Stdev Stream")
+                action_head_result.append(indent_string('\n'.join(action_head_stdev_result)))
+            else:
+                action_head_result.append('\n'.join(action_head_mean_result))
+
+            if len(action_spaces) > 1:
+                result.append("Action head {}".format(action_space_idx))
+                result.append(indent_string('\n'.join(action_head_result)))
+            else:
+                result.append('\n'.join(action_head_result))
+
+        return '\n'.join(result)
@@ -17,7 +17,7 @@
 import numpy as np
 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import ActionProbabilities
@@ -27,9 +27,13 @@ from rl_coach.utils import eps


 class PPOHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class PPOHead(Head):
@@ -146,3 +150,15 @@ class PPOHead(Head):
        self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)

        self.output = [self.policy_mean, self.policy_std]
+
+    def __str__(self):
+        action_head_mean_result = []
+        if isinstance(self.spaces.action, DiscreteActionSpace):
+            # create a discrete action network (softmax probabilities output)
+            action_head_mean_result.append("Dense (num outputs = {})".format(len(self.spaces.action.actions)))
+            action_head_mean_result.append("Softmax")
+        elif isinstance(self.spaces.action, BoxActionSpace):
+            # create a continuous action network (bounded mean and stdev outputs)
+            action_head_mean_result.append("Dense (num outputs = {})".format(self.spaces.action.shape))
+
+        return '\n'.join(action_head_mean_result)
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition


 class PPOVHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class PPOVHead(Head):
@@ -55,3 +59,9 @@ class PPOVHead(Head):
        self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
        self.loss = self.vf_loss
        tf.losses.add_loss(self.loss)
+
+    def __str__(self):
+        result = [
+            "Dense (num outputs = 1)"
+        ]
+        return '\n'.join(result)
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpac


 class QHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class QHead(Head):
@@ -51,5 +55,10 @@ class QHead(Head):
        # Standard Q Network
        self.output = self.dense_layer(self.num_actions)(input_layer, name='output')

+    def __str__(self):
+        result = [
+            "Dense (num outputs = {})".format(self.num_actions)
+        ]
+        return '\n'.join(result)


@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition

 class QuantileRegressionQHeadParameters(HeadParameters):
    def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
-                 dense_layer=Dense):
-        super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function,
-                         name=name, dense_layer=dense_layer)
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
+        super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class QuantileRegressionQHead(Head):
@@ -79,3 +82,11 @@ class QuantileRegressionQHead(Head):
        quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms)
        self.loss = quantile_regression_loss
        tf.losses.add_loss(self.loss)
+
+    def __str__(self):
+        result = [
+            "Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
+            "Reshape (new size = {} x {})".format(self.num_actions, self.num_atoms)
+        ]
+        return '\n'.join(result)
+
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense
 from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.core_types import QActionStateValue
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition


 class RainbowQHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class RainbowQHead(Head):
@@ -69,3 +73,17 @@ class RainbowQHead(Head):
        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
        tf.losses.add_loss(self.loss)

+    def __str__(self):
+        result = [
+            "State Value Stream - V",
+            "\tDense (num outputs = 512)",
+            "\tDense (num outputs = {})".format(self.num_atoms),
+            "Action Advantage Stream - A",
+            "\tDense (num outputs = 512)",
+            "\tDense (num outputs = {})".format(self.num_actions * self.num_atoms),
+            "\tReshape (new size = {} x {})".format(self.num_actions, self.num_atoms),
+            "\tSubtract(A, Mean(A))".format(self.num_actions),
+            "Add (V, A)",
+            "Softmax"
+        ]
+        return '\n'.join(result)
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from rl_coach.architectures.tensorflow_components.architecture import Dense
+from rl_coach.architectures.tensorflow_components.layers import Dense

 from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
 from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition


 class VHeadParameters(HeadParameters):
-    def __init__(self, activation_function: str ='relu', name: str='v_head_params', dense_layer=Dense):
+    def __init__(self, activation_function: str ='relu', name: str='v_head_params',
+                 num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+                 loss_weight: float = 1.0, dense_layer=Dense):
        super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
-                         dense_layer=dense_layer)
+                         dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+                         rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+                         loss_weight=loss_weight)


 class VHead(Head):
@@ -48,3 +52,9 @@ class VHead(Head):
        # Standard V Network
        self.output = self.dense_layer(1)(input_layer, name='output',
                                          kernel_initializer=normalized_columns_initializer(1.0))
+
+    def __str__(self):
+        result = [
+            "Dense (num outputs = 1)"
+        ]
+        return '\n'.join(result)