Adding mxnet components to rl_coach/architectures (#60)

Adding mxnet components to rl_coach architectures. - Supports PPO and DQN - Tested with CartPole_PPO and CarPole_DQN - Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset - Checkpointing is disabled for MXNet
2026-07-09 02:46:33 +02:00 · 2018-11-07 07:07:15 -08:00
parent e7a91b4dc3
commit 5fadb9c18e
39 changed files with 3864 additions and 44 deletions
@@ -0,0 +1,4 @@
+from .fc_middleware import FCMiddleware
+from .lstm_middleware import LSTMMiddleware
+
+__all__ = ["FCMiddleware", "LSTMMiddleware"]
@@ -0,0 +1,52 @@
+"""
+Module that defines the fully-connected middleware class
+"""
+
+from rl_coach.architectures.mxnet_components.layers import Dense
+from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import MiddlewareScheme
+
+
+class FCMiddleware(Middleware):
+    def __init__(self, params: FCMiddlewareParameters):
+        """
+        FCMiddleware or Fully-Connected Middleware can be used in the middle part of the network. It takes the
+        embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
+        and passes it through a neural network  which can be customizable but shared between the heads of the network.
+
+        :param params: parameters object containing batchnorm, activation_function and dropout properties.
+        """
+        super(FCMiddleware, self).__init__(params)
+
+    @property
+    def schemes(self) -> dict:
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
+        Middleware. Are used to create Block when FCMiddleware is initialised.
+
+        :return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
+        """
+        return {
+            MiddlewareScheme.Empty:
+                [],
+
+            # Use for PPO
+            MiddlewareScheme.Shallow:
+                [
+                    Dense(units=64)
+                ],
+
+            # Use for DQN
+            MiddlewareScheme.Medium:
+                [
+                    Dense(units=512)
+                ],
+
+            MiddlewareScheme.Deep:
+                [
+                    Dense(units=128),
+                    Dense(units=128),
+                    Dense(units=128)
+                ]
+        }
@@ -0,0 +1,80 @@
+"""
+Module that defines the LSTM middleware class
+"""
+
+from typing import Union
+from types import ModuleType
+
+import mxnet as mx
+from mxnet.gluon import rnn
+from rl_coach.architectures.mxnet_components.layers import Dense
+from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
+from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
+from rl_coach.base_parameters import MiddlewareScheme
+
+nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
+
+
+class LSTMMiddleware(Middleware):
+    def __init__(self, params: LSTMMiddlewareParameters):
+        """
+        LSTMMiddleware or Long Short Term Memory Middleware can be used in the middle part of the network. It takes the
+        embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
+        and passes it through a neural network  which can be customizable but shared between the heads of the network.
+
+        :param params: parameters object containing batchnorm, activation_function, dropout and
+            number_of_lstm_cells properties.
+        """
+        super(LSTMMiddleware, self).__init__(params)
+        self.number_of_lstm_cells = params.number_of_lstm_cells
+        with self.name_scope():
+            self.lstm = rnn.LSTM(hidden_size=self.number_of_lstm_cells)
+
+    @property
+    def schemes(self) -> dict:
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
+        Middleware. Are used to create Block when LSTMMiddleware is initialised, and are applied before the LSTM.
+
+        :return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
+        """
+        return {
+            MiddlewareScheme.Empty:
+                [],
+
+            # Use for PPO
+            MiddlewareScheme.Shallow:
+                [
+                    Dense(units=64)
+                ],
+
+            # Use for DQN
+            MiddlewareScheme.Medium:
+                [
+                    Dense(units=512)
+                ],
+
+            MiddlewareScheme.Deep:
+                [
+                    Dense(units=128),
+                    Dense(units=128),
+                    Dense(units=128)
+                ]
+        }
+
+    def hybrid_forward(self,
+                       F: ModuleType,
+                       x: nd_sym_type,
+                       *args, **kwargs) -> nd_sym_type:
+        """
+        Used for forward pass through LSTM middleware network.
+        Applies dense layers from selected scheme before passing result to LSTM layer.
+
+        :param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
+        :param x: state embedding, of shape (batch_size, in_channels).
+        :return: state middleware embedding, where shape is (batch_size, channels).
+        """
+        x_ntc = x.reshape(shape=(0, 0, -1))
+        emb_ntc = super(LSTMMiddleware, self).hybrid_forward(F, x_ntc, *args, **kwargs)
+        emb_tnc = emb_ntc.transpose(axes=(1, 0, 2))
+        return self.lstm(emb_tnc)
@@ -0,0 +1,61 @@
+from typing import Union
+from types import ModuleType
+
+import mxnet as mx
+from mxnet.gluon import nn
+from rl_coach.architectures.middleware_parameters import MiddlewareParameters
+from rl_coach.architectures.mxnet_components.layers import convert_layer
+from rl_coach.base_parameters import MiddlewareScheme
+
+nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
+
+
+class Middleware(nn.HybridBlock):
+    def __init__(self, params: MiddlewareParameters):
+        """
+        Middleware is the middle part of the network. It takes the embeddings from the input embedders,
+        after they were aggregated in some method (for example, concatenation) and passes it through a neural network
+        which can be customizable but shared between the heads of the network.
+
+        :param params: parameters object containing batchnorm, activation_function and dropout properties.
+        """
+        super(Middleware, self).__init__()
+        self.scheme = params.scheme
+
+        with self.name_scope():
+            self.net = nn.HybridSequential()
+            if isinstance(self.scheme, MiddlewareScheme):
+                blocks = self.schemes[self.scheme]
+            else:
+                # if scheme is specified directly, convert to MX layer if it's not a callable object
+                # NOTE: if layer object is callable, it must return a gluon block when invoked
+                blocks = [convert_layer(l) for l in self.scheme]
+            for block in blocks:
+                self.net.add(block())
+                if params.batchnorm:
+                    self.net.add(nn.BatchNorm())
+                if params.activation_function:
+                    self.net.add(nn.Activation(params.activation_function))
+                if params.dropout:
+                    self.net.add(nn.Dropout(rate=params.dropout))
+
+    @property
+    def schemes(self) -> dict:
+        """
+        Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
+        Middleware. Should be implemented in child classes, and are used to create Block when Middleware is initialised.
+
+        :return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
+        """
+        raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
+                                  "configurations.")
+
+    def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
+        """
+        Used for forward pass through middleware network.
+
+        :param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
+        :param x: state embedding, of shape (batch_size, in_channels).
+        :return: state middleware embedding, where shape is (batch_size, channels).
+        """
+        return self.net(x)