pre-release 0.10.0

2026-03-05 00:15:50 +01:00 · 2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions
--- a/rl_coach/architectures/tensorflow_components/heads/head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/head.py
@@ -0,0 +1,165 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Type
+
+import numpy as np
+import tensorflow as tf
+from rl_coach.base_parameters import AgentParameters, Parameters
+from rl_coach.spaces import SpacesDefinition
+from tensorflow.python.ops.losses.losses_impl import Reduction
+
+from rl_coach.utils import force_list
+
+
+# Used to initialize weights for policy and value output layers
+def normalized_columns_initializer(std=1.0):
+    def _initializer(shape, dtype=None, partition_info=None):
+        out = np.random.randn(*shape).astype(np.float32)
+        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
+        return tf.constant(out)
+    return _initializer
+
+
+class HeadParameters(Parameters):
+    def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head'):
+        super().__init__()
+        self.activation_function = activation_function
+        self.name = name
+        self.parameterized_class_name = parameterized_class.__name__
+
+
+class Head(object):
+    """
+    A head is the final part of the network. It takes the embedding from the middleware embedder and passes it through
+    a neural network to produce the output of the network. There can be multiple heads in a network, and each one has
+    an assigned loss function. The heads are algorithm dependent.
+    """
+    def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
+                 head_idx: int=0, loss_weight: float=1., is_local: bool=True, activation_function: str='relu'):
+        self.head_idx = head_idx
+        self.network_name = network_name
+        self.network_parameters = agent_parameters.network_wrappers[self.network_name]
+        self.name = "head"
+        self.output = []
+        self.loss = []
+        self.loss_type = []
+        self.regularizations = []
+        self.loss_weight = force_list(loss_weight)
+        self.target = []
+        self.importance_weight = []
+        self.input = []
+        self.is_local = is_local
+        self.ap = agent_parameters
+        self.spaces = spaces
+        self.return_type = None
+        self.activation_function = activation_function
+
+    def __call__(self, input_layer):
+        """
+        Wrapper for building the module graph including scoping and loss creation
+        :param input_layer: the input to the graph
+        :return: the output of the last layer and the target placeholder
+        """
+        with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
+            self._build_module(input_layer)
+
+            self.output = force_list(self.output)
+            self.target = force_list(self.target)
+            self.input = force_list(self.input)
+            self.loss_type = force_list(self.loss_type)
+            self.loss = force_list(self.loss)
+            self.regularizations = force_list(self.regularizations)
+            if self.is_local:
+                self.set_loss()
+            self._post_build()
+
+        if self.is_local:
+            return self.output, self.target, self.input, self.importance_weight
+        else:
+            return self.output, self.input
+
+    def _build_module(self, input_layer):
+        """
+        Builds the graph of the module
+        This method is called early on from __call__. It is expected to store the graph
+        in self.output.
+        :param input_layer: the input to the graph
+        :return: None
+        """
+        pass
+
+    def _post_build(self):
+        """
+        Optional function that allows adding any extra definitions after the head has been fully defined
+        For example, this allows doing additional calculations that are based on the loss
+        :return: None
+        """
+        pass
+
+    def get_name(self):
+        """
+        Get a formatted name for the module
+        :return: the formatted name
+        """
+        return '{}_{}'.format(self.name, self.head_idx)
+
+    def set_loss(self):
+        """
+        Creates a target placeholder and loss function for each loss_type and regularization
+        :param loss_type: a tensorflow loss function
+        :param scope: the name scope to include the tensors in
+        :return: None
+        """
+
+        # there are heads that define the loss internally, but we need to create additional placeholders for them
+        for idx in range(len(self.loss)):
+            importance_weight = tf.placeholder('float',
+                                               [None] + [1] * (len(self.target[idx].shape) - 1),
+                                               '{}_importance_weight'.format(self.get_name()))
+            self.importance_weight.append(importance_weight)
+
+        # add losses and target placeholder
+        for idx in range(len(self.loss_type)):
+            # create target placeholder
+            target = tf.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name()))
+            self.target.append(target)
+
+            # create importance sampling weights placeholder
+            num_target_dims = len(self.target[idx].shape)
+            importance_weight = tf.placeholder('float', [None] + [1] * (num_target_dims - 1),
+                                               '{}_importance_weight'.format(self.get_name()))
+            self.importance_weight.append(importance_weight)
+
+            # compute the weighted loss. importance_weight weights over the samples in the batch, while self.loss_weight
+            # weights the specific loss of this head against other losses in this head or in other heads
+            loss_weight = self.loss_weight[idx]*importance_weight
+            loss = self.loss_type[idx](self.target[-1], self.output[idx],
+                                       scope=self.get_name(), reduction=Reduction.NONE, loss_collection=None)
+
+            # the loss is first summed over each sample in the batch and then the mean over the batch is taken
+            loss = tf.reduce_mean(loss_weight*tf.reduce_sum(loss, axis=list(range(1, num_target_dims))))
+
+            # we add the loss to the losses collection and later we will extract it in general_network
+            tf.losses.add_loss(loss)
+            self.loss.append(loss)
+
+        # add regularizations
+        for regularization in self.regularizations:
+            self.loss.append(regularization)
+
+    @classmethod
+    def path(cls):
+        return cls.__class__.__name__