Adding mxnet components to rl_coach/architectures (#60)

Adding mxnet components to rl_coach architectures. - Supports PPO and DQN - Tested with CartPole_PPO and CarPole_DQN - Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset - Checkpointing is disabled for MXNet
2026-07-09 02:46:33 +02:00 · 2018-11-07 07:07:15 -08:00
parent e7a91b4dc3
commit 5fadb9c18e
39 changed files with 3864 additions and 44 deletions
@@ -0,0 +1,4 @@
+from .image_embedder import ImageEmbedder
+from .vector_embedder import VectorEmbedder
+
+__all__ = ['ImageEmbedder', 'VectorEmbedder']
@@ -0,0 +1,71 @@
+from typing import Union
+from types import ModuleType
+
+import mxnet as mx
+from mxnet.gluon import nn
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.mxnet_components.layers import convert_layer
+from rl_coach.base_parameters import EmbedderScheme
+
+nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
+
+
+class InputEmbedder(nn.HybridBlock):
+    def __init__(self, params: InputEmbedderParameters):
+        """
+        An input embedder is the first part of the network, which takes the input from the state and produces a vector
+        embedding by passing it through a neural network. The embedder will mostly be input type dependent, and there
+        can be multiple embedders in a single network.
+
+        :param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
+            and dropout properties.
+        """
+        super(InputEmbedder, self).__init__()
+        self.embedder_name = params.name
+        self.input_clipping = params.input_clipping
+        self.scheme = params.scheme
+
+        with self.name_scope():
+            self.net = nn.HybridSequential()
+            if isinstance(self.scheme, EmbedderScheme):
+                blocks = self.schemes[self.scheme]
+            else:
+                # if scheme is specified directly, convert to MX layer if it's not a callable object
+                # NOTE: if layer object is callable, it must return a gluon block when invoked
+                blocks = [convert_layer(l) for l in self.scheme]
+            for block in blocks:
+                self.net.add(block())
+                if params.batchnorm:
+                    self.net.add(nn.BatchNorm())
+                if params.activation_function:
+                    self.net.add(nn.Activation(params.activation_function))
+                if params.dropout:
+                    self.net.add(nn.Dropout(rate=params.dropout))
+
+    @property
+    def schemes(self) -> dict:
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
+        InputEmbedder. Should be implemented in child classes, and are used to create Block when InputEmbedder is
+        initialised.
+
+        :return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
+        """
+        raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
+                                  "configurations.")
+
+    def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
+        """
+        Used for forward pass through embedder network.
+
+        :param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
+        :param x: environment state, where first dimension is batch_size, then dimensions are data type dependent.
+        :return: embedding of environment state, where shape is (batch_size, channels).
+        """
+        # `input_rescaling` and `input_offset` set on inheriting embedder
+        x = x / self.input_rescaling
+        x = x - self.input_offset
+        if self.input_clipping is not None:
+            x.clip(a_min=self.input_clipping[0], a_max=self.input_clipping[1])
+        x = self.net(x)
+        return x.flatten()
@@ -0,0 +1,76 @@
+from typing import Union
+from types import ModuleType
+
+import mxnet as mx
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.mxnet_components.embedders.embedder import InputEmbedder
+from rl_coach.architectures.mxnet_components.layers import Conv2d
+from rl_coach.base_parameters import EmbedderScheme
+
+nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
+
+
+class ImageEmbedder(InputEmbedder):
+    def __init__(self, params: InputEmbedderParameters):
+        """
+        An image embedder is an input embedder that takes an image input from the state and produces a vector
+        embedding by passing it through a neural network.
+
+        :param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
+            and dropout properties.
+        """
+        super(ImageEmbedder, self).__init__(params)
+        self.input_rescaling = params.input_rescaling['image']
+        self.input_offset = params.input_offset['image']
+
+    @property
+    def schemes(self) -> dict:
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used. Are used
+        to create Block when ImageEmbedder is initialised.
+
+        :return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
+        """
+        return {
+            EmbedderScheme.Empty:
+                [],
+
+            EmbedderScheme.Shallow:
+                [
+                    Conv2d(num_filters=32, kernel_size=8, strides=4)
+                ],
+
+            # Use for Atari DQN
+            EmbedderScheme.Medium:
+                [
+                    Conv2d(num_filters=32, kernel_size=8, strides=4),
+                    Conv2d(num_filters=64, kernel_size=4, strides=2),
+                    Conv2d(num_filters=64, kernel_size=3, strides=1)
+                ],
+
+            # Use for Carla
+            EmbedderScheme.Deep:
+                [
+                    Conv2d(num_filters=32, kernel_size=5, strides=2),
+                    Conv2d(num_filters=32, kernel_size=3, strides=1),
+                    Conv2d(num_filters=64, kernel_size=3, strides=2),
+                    Conv2d(num_filters=64, kernel_size=3, strides=1),
+                    Conv2d(num_filters=128, kernel_size=3, strides=2),
+                    Conv2d(num_filters=128, kernel_size=3, strides=1),
+                    Conv2d(num_filters=256, kernel_size=3, strides=2),
+                    Conv2d(num_filters=256, kernel_size=3, strides=1)
+                ]
+        }
+
+    def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
+        """
+        Used for forward pass through embedder network.
+
+        :param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
+        :param x: image representing environment state, of shape (batch_size, in_channels, height, width).
+        :return: embedding of environment state, of shape (batch_size, channels).
+        """
+        if len(x.shape) != 4 and self.scheme != EmbedderScheme.Empty:
+            raise ValueError("Image embedders expect the input size to have 4 dimensions. The given size is: {}"
+                             .format(x.shape))
+        return super(ImageEmbedder, self).hybrid_forward(F, x, *args, **kwargs)
@@ -0,0 +1,71 @@
+from typing import Union
+from types import ModuleType
+
+import mxnet as mx
+from mxnet import nd, sym
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.mxnet_components.embedders.embedder import InputEmbedder
+from rl_coach.architectures.mxnet_components.layers import Dense
+from rl_coach.base_parameters import EmbedderScheme
+
+nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
+
+
+class VectorEmbedder(InputEmbedder):
+    def __init__(self, params: InputEmbedderParameters):
+        """
+        An vector embedder is an input embedder that takes an vector input from the state and produces a vector
+        embedding by passing it through a neural network.
+
+        :param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
+            and dropout properties.
+        """
+        super(VectorEmbedder, self).__init__(params)
+        self.input_rescaling = params.input_rescaling['vector']
+        self.input_offset = params.input_offset['vector']
+
+    @property
+    def schemes(self):
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used. Are used
+        to create Block when VectorEmbedder is initialised.
+
+        :return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
+        """
+        return {
+            EmbedderScheme.Empty:
+                [],
+
+            EmbedderScheme.Shallow:
+                [
+                    Dense(units=128)
+                ],
+
+            # Use for DQN
+            EmbedderScheme.Medium:
+                [
+                    Dense(units=256)
+                ],
+
+            # Use for Carla
+            EmbedderScheme.Deep:
+                [
+                    Dense(units=128),
+                    Dense(units=128),
+                    Dense(units=128)
+                ]
+        }
+
+    def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
+        """
+        Used for forward pass through embedder network.
+
+        :param F: backend api, either `nd` or `sym` (if block has been hybridized).
+        :type F: nd or sym
+        :param x: vector representing environment state, of shape (batch_size, in_channels).
+        :return: embedding of environment state, of shape (batch_size, channels).
+        """
+        if isinstance(x, nd.NDArray) and len(x.shape) != 2 and self.scheme != EmbedderScheme.Empty:
+            raise ValueError("Vector embedders expect the input size to have 2 dimensions. The given size is: {}"
+                             .format(x.shape))
+        return super(VectorEmbedder, self).hybrid_forward(F, x, *args, **kwargs)