1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-15 20:13:33 +02:00

Adding mxnet components to rl_coach/architectures (#60)

Adding mxnet components to rl_coach architectures.

- Supports PPO and DQN
- Tested with CartPole_PPO and CarPole_DQN
- Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset
- Checkpointing is disabled for MXNet
This commit is contained in:
Sina Afrooze
2018-11-07 07:07:15 -08:00
committed by Itai Caspi
parent e7a91b4dc3
commit 5fadb9c18e
39 changed files with 3864 additions and 44 deletions

View File

@@ -0,0 +1,4 @@
from .fc_middleware import FCMiddleware
from .lstm_middleware import LSTMMiddleware
__all__ = ["FCMiddleware", "LSTMMiddleware"]

View File

@@ -0,0 +1,52 @@
"""
Module that defines the fully-connected middleware class
"""
from rl_coach.architectures.mxnet_components.layers import Dense
from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
from rl_coach.base_parameters import MiddlewareScheme
class FCMiddleware(Middleware):
def __init__(self, params: FCMiddlewareParameters):
"""
FCMiddleware or Fully-Connected Middleware can be used in the middle part of the network. It takes the
embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
and passes it through a neural network which can be customizable but shared between the heads of the network.
:param params: parameters object containing batchnorm, activation_function and dropout properties.
"""
super(FCMiddleware, self).__init__(params)
@property
def schemes(self) -> dict:
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
Middleware. Are used to create Block when FCMiddleware is initialised.
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
"""
return {
MiddlewareScheme.Empty:
[],
# Use for PPO
MiddlewareScheme.Shallow:
[
Dense(units=64)
],
# Use for DQN
MiddlewareScheme.Medium:
[
Dense(units=512)
],
MiddlewareScheme.Deep:
[
Dense(units=128),
Dense(units=128),
Dense(units=128)
]
}

View File

@@ -0,0 +1,80 @@
"""
Module that defines the LSTM middleware class
"""
from typing import Union
from types import ModuleType
import mxnet as mx
from mxnet.gluon import rnn
from rl_coach.architectures.mxnet_components.layers import Dense
from rl_coach.architectures.mxnet_components.middlewares.middleware import Middleware
from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
from rl_coach.base_parameters import MiddlewareScheme
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
class LSTMMiddleware(Middleware):
def __init__(self, params: LSTMMiddlewareParameters):
"""
LSTMMiddleware or Long Short Term Memory Middleware can be used in the middle part of the network. It takes the
embeddings from the input embedders, after they were aggregated in some method (for example, concatenation)
and passes it through a neural network which can be customizable but shared between the heads of the network.
:param params: parameters object containing batchnorm, activation_function, dropout and
number_of_lstm_cells properties.
"""
super(LSTMMiddleware, self).__init__(params)
self.number_of_lstm_cells = params.number_of_lstm_cells
with self.name_scope():
self.lstm = rnn.LSTM(hidden_size=self.number_of_lstm_cells)
@property
def schemes(self) -> dict:
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
Middleware. Are used to create Block when LSTMMiddleware is initialised, and are applied before the LSTM.
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
"""
return {
MiddlewareScheme.Empty:
[],
# Use for PPO
MiddlewareScheme.Shallow:
[
Dense(units=64)
],
# Use for DQN
MiddlewareScheme.Medium:
[
Dense(units=512)
],
MiddlewareScheme.Deep:
[
Dense(units=128),
Dense(units=128),
Dense(units=128)
]
}
def hybrid_forward(self,
F: ModuleType,
x: nd_sym_type,
*args, **kwargs) -> nd_sym_type:
"""
Used for forward pass through LSTM middleware network.
Applies dense layers from selected scheme before passing result to LSTM layer.
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
:param x: state embedding, of shape (batch_size, in_channels).
:return: state middleware embedding, where shape is (batch_size, channels).
"""
x_ntc = x.reshape(shape=(0, 0, -1))
emb_ntc = super(LSTMMiddleware, self).hybrid_forward(F, x_ntc, *args, **kwargs)
emb_tnc = emb_ntc.transpose(axes=(1, 0, 2))
return self.lstm(emb_tnc)

View File

@@ -0,0 +1,61 @@
from typing import Union
from types import ModuleType
import mxnet as mx
from mxnet.gluon import nn
from rl_coach.architectures.middleware_parameters import MiddlewareParameters
from rl_coach.architectures.mxnet_components.layers import convert_layer
from rl_coach.base_parameters import MiddlewareScheme
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
class Middleware(nn.HybridBlock):
def __init__(self, params: MiddlewareParameters):
"""
Middleware is the middle part of the network. It takes the embeddings from the input embedders,
after they were aggregated in some method (for example, concatenation) and passes it through a neural network
which can be customizable but shared between the heads of the network.
:param params: parameters object containing batchnorm, activation_function and dropout properties.
"""
super(Middleware, self).__init__()
self.scheme = params.scheme
with self.name_scope():
self.net = nn.HybridSequential()
if isinstance(self.scheme, MiddlewareScheme):
blocks = self.schemes[self.scheme]
else:
# if scheme is specified directly, convert to MX layer if it's not a callable object
# NOTE: if layer object is callable, it must return a gluon block when invoked
blocks = [convert_layer(l) for l in self.scheme]
for block in blocks:
self.net.add(block())
if params.batchnorm:
self.net.add(nn.BatchNorm())
if params.activation_function:
self.net.add(nn.Activation(params.activation_function))
if params.dropout:
self.net.add(nn.Dropout(rate=params.dropout))
@property
def schemes(self) -> dict:
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
Middleware. Should be implemented in child classes, and are used to create Block when Middleware is initialised.
:return: dictionary of schemes, with key of type MiddlewareScheme enum and value being list of mxnet.gluon.Block.
"""
raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
"configurations.")
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
"""
Used for forward pass through middleware network.
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
:param x: state embedding, of shape (batch_size, in_channels).
:return: state middleware embedding, where shape is (batch_size, channels).
"""
return self.net(x)