mirror of
https://github.com/gryf/coach.git
synced 2026-04-15 20:13:33 +02:00
Adding mxnet components to rl_coach/architectures (#60)
Adding mxnet components to rl_coach architectures. - Supports PPO and DQN - Tested with CartPole_PPO and CarPole_DQN - Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset - Checkpointing is disabled for MXNet
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from .fc_middleware import FCMiddleware
|
||||
from .lstm_middleware import LSTMMiddleware
|
||||
|
||||
__all__ = ["FCMiddleware", "LSTMMiddleware"]
|
||||
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
Module that defines the fully-connected middleware class
|
||||
"""
|
||||
|
||||
from rl_coach.architectures.mxnet_components.layers import Dense
|
||||
from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
|
||||
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
|
||||
|
||||
class FCMiddleware(Middleware):
|
||||
def __init__(self, params: FCMiddlewareParameters):
|
||||
"""
|
||||
FCMiddleware or Fully-Connected Middleware can be used in the middle part of the network. It takes the
|
||||
embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
|
||||
and passes it through a neural network which can be customizable but shared between the heads of the network.
|
||||
|
||||
:param params: parameters object containing batchnorm, activation_function and dropout properties.
|
||||
"""
|
||||
super(FCMiddleware, self).__init__(params)
|
||||
|
||||
@property
|
||||
def schemes(self) -> dict:
|
||||
"""
|
||||
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
|
||||
Middleware. Are used to create Block when FCMiddleware is initialised.
|
||||
|
||||
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
|
||||
"""
|
||||
return {
|
||||
MiddlewareScheme.Empty:
|
||||
[],
|
||||
|
||||
# Use for PPO
|
||||
MiddlewareScheme.Shallow:
|
||||
[
|
||||
Dense(units=64)
|
||||
],
|
||||
|
||||
# Use for DQN
|
||||
MiddlewareScheme.Medium:
|
||||
[
|
||||
Dense(units=512)
|
||||
],
|
||||
|
||||
MiddlewareScheme.Deep:
|
||||
[
|
||||
Dense(units=128),
|
||||
Dense(units=128),
|
||||
Dense(units=128)
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Module that defines the LSTM middleware class
|
||||
"""
|
||||
|
||||
from typing import Union
|
||||
from types import ModuleType
|
||||
|
||||
import mxnet as mx
|
||||
from mxnet.gluon import rnn
|
||||
from rl_coach.architectures.mxnet_components.layers import Dense
|
||||
from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
|
||||
from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
|
||||
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
|
||||
|
||||
|
||||
class LSTMMiddleware(Middleware):
|
||||
def __init__(self, params: LSTMMiddlewareParameters):
|
||||
"""
|
||||
LSTMMiddleware or Long Short Term Memory Middleware can be used in the middle part of the network. It takes the
|
||||
embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
|
||||
and passes it through a neural network which can be customizable but shared between the heads of the network.
|
||||
|
||||
:param params: parameters object containing batchnorm, activation_function, dropout and
|
||||
number_of_lstm_cells properties.
|
||||
"""
|
||||
super(LSTMMiddleware, self).__init__(params)
|
||||
self.number_of_lstm_cells = params.number_of_lstm_cells
|
||||
with self.name_scope():
|
||||
self.lstm = rnn.LSTM(hidden_size=self.number_of_lstm_cells)
|
||||
|
||||
@property
|
||||
def schemes(self) -> dict:
|
||||
"""
|
||||
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
|
||||
Middleware. Are used to create Block when LSTMMiddleware is initialised, and are applied before the LSTM.
|
||||
|
||||
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
|
||||
"""
|
||||
return {
|
||||
MiddlewareScheme.Empty:
|
||||
[],
|
||||
|
||||
# Use for PPO
|
||||
MiddlewareScheme.Shallow:
|
||||
[
|
||||
Dense(units=64)
|
||||
],
|
||||
|
||||
# Use for DQN
|
||||
MiddlewareScheme.Medium:
|
||||
[
|
||||
Dense(units=512)
|
||||
],
|
||||
|
||||
MiddlewareScheme.Deep:
|
||||
[
|
||||
Dense(units=128),
|
||||
Dense(units=128),
|
||||
Dense(units=128)
|
||||
]
|
||||
}
|
||||
|
||||
def hybrid_forward(self,
|
||||
F: ModuleType,
|
||||
x: nd_sym_type,
|
||||
*args, **kwargs) -> nd_sym_type:
|
||||
"""
|
||||
Used for forward pass through LSTM middleware network.
|
||||
Applies dense layers from selected scheme before passing result to LSTM layer.
|
||||
|
||||
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
|
||||
:param x: state embedding, of shape (batch_size, in_channels).
|
||||
:return: state middleware embedding, where shape is (batch_size, channels).
|
||||
"""
|
||||
x_ntc = x.reshape(shape=(0, 0, -1))
|
||||
emb_ntc = super(LSTMMiddleware, self).hybrid_forward(F, x_ntc, *args, **kwargs)
|
||||
emb_tnc = emb_ntc.transpose(axes=(1, 0, 2))
|
||||
return self.lstm(emb_tnc)
|
||||
@@ -0,0 +1,61 @@
|
||||
from typing import Union
|
||||
from types import ModuleType
|
||||
|
||||
import mxnet as mx
|
||||
from mxnet.gluon import nn
|
||||
from rl_coach.architectures.middleware_parameters import MiddlewareParameters
|
||||
from rl_coach.architectures.mxnet_components.layers import convert_layer
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
|
||||
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
|
||||
|
||||
|
||||
class Middleware(nn.HybridBlock):
|
||||
def __init__(self, params: MiddlewareParameters):
|
||||
"""
|
||||
Middleware is the middle part of the network. It takes the embeddings from the input embedders,
|
||||
after they were aggregated in some method (for example, concatenation) and passes it through a neural network
|
||||
which can be customizable but shared between the heads of the network.
|
||||
|
||||
:param params: parameters object containing batchnorm, activation_function and dropout properties.
|
||||
"""
|
||||
super(Middleware, self).__init__()
|
||||
self.scheme = params.scheme
|
||||
|
||||
with self.name_scope():
|
||||
self.net = nn.HybridSequential()
|
||||
if isinstance(self.scheme, MiddlewareScheme):
|
||||
blocks = self.schemes[self.scheme]
|
||||
else:
|
||||
# if scheme is specified directly, convert to MX layer if it's not a callable object
|
||||
# NOTE: if layer object is callable, it must return a gluon block when invoked
|
||||
blocks = [convert_layer(l) for l in self.scheme]
|
||||
for block in blocks:
|
||||
self.net.add(block())
|
||||
if params.batchnorm:
|
||||
self.net.add(nn.BatchNorm())
|
||||
if params.activation_function:
|
||||
self.net.add(nn.Activation(params.activation_function))
|
||||
if params.dropout:
|
||||
self.net.add(nn.Dropout(rate=params.dropout))
|
||||
|
||||
@property
|
||||
def schemes(self) -> dict:
|
||||
"""
|
||||
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
|
||||
Middleware. Should be implemented in child classes, and are used to create Block when Middleware is initialised.
|
||||
|
||||
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
|
||||
"""
|
||||
raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
|
||||
"configurations.")
|
||||
|
||||
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
|
||||
"""
|
||||
Used for forward pass through middleware network.
|
||||
|
||||
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
|
||||
:param x: state embedding, of shape (batch_size, in_channels).
|
||||
:return: state middleware embedding, where shape is (batch_size, channels).
|
||||
"""
|
||||
return self.net(x)
|
||||
Reference in New Issue
Block a user