1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-31 00:53:32 +02:00

Adding mxnet components to rl_coach/architectures (#60)

Adding mxnet components to rl_coach architectures.

- Supports PPO and DQN
- Tested with CartPole_PPO and CarPole_DQN
- Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset
- Checkpointing is disabled for MXNet
This commit is contained in:
Sina Afrooze
2018-11-07 07:07:15 -08:00
committed by Itai Caspi
parent e7a91b4dc3
commit 5fadb9c18e
39 changed files with 3864 additions and 44 deletions

View File

@@ -0,0 +1,4 @@
from .image_embedder import ImageEmbedder
from .vector_embedder import VectorEmbedder
__all__ = ['ImageEmbedder', 'VectorEmbedder']

View File

@@ -0,0 +1,71 @@
from typing import Union
from types import ModuleType
import mxnet as mx
from mxnet.gluon import nn
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
from rl_coach.architectures.mxnet_components.layers import convert_layer
from rl_coach.base_parameters import EmbedderScheme
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
class InputEmbedder(nn.HybridBlock):
def __init__(self, params: InputEmbedderParameters):
"""
An input embedder is the first part of the network, which takes the input from the state and produces a vector
embedding by passing it through a neural network. The embedder will mostly be input type dependent, and there
can be multiple embedders in a single network.
:param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
and dropout properties.
"""
super(InputEmbedder, self).__init__()
self.embedder_name = params.name
self.input_clipping = params.input_clipping
self.scheme = params.scheme
with self.name_scope():
self.net = nn.HybridSequential()
if isinstance(self.scheme, EmbedderScheme):
blocks = self.schemes[self.scheme]
else:
# if scheme is specified directly, convert to MX layer if it's not a callable object
# NOTE: if layer object is callable, it must return a gluon block when invoked
blocks = [convert_layer(l) for l in self.scheme]
for block in blocks:
self.net.add(block())
if params.batchnorm:
self.net.add(nn.BatchNorm())
if params.activation_function:
self.net.add(nn.Activation(params.activation_function))
if params.dropout:
self.net.add(nn.Dropout(rate=params.dropout))
@property
def schemes(self) -> dict:
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used for the
InputEmbedder. Should be implemented in child classes, and are used to create Block when InputEmbedder is
initialised.
:return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
"""
raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
"configurations.")
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
"""
Used for forward pass through embedder network.
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
:param x: environment state, where first dimension is batch_size, then dimensions are data type dependent.
:return: embedding of environment state, where shape is (batch_size, channels).
"""
# `input_rescaling` and `input_offset` set on inheriting embedder
x = x / self.input_rescaling
x = x - self.input_offset
if self.input_clipping is not None:
x.clip(a_min=self.input_clipping[0], a_max=self.input_clipping[1])
x = self.net(x)
return x.flatten()

View File

@@ -0,0 +1,76 @@
from typing import Union
from types import ModuleType
import mxnet as mx
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
from rl_coach.architectures.mxnet_components.embedders.embedder import InputEmbedder
from rl_coach.architectures.mxnet_components.layers import Conv2d
from rl_coach.base_parameters import EmbedderScheme
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
class ImageEmbedder(InputEmbedder):
def __init__(self, params: InputEmbedderParameters):
"""
An image embedder is an input embedder that takes an image input from the state and produces a vector
embedding by passing it through a neural network.
:param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
and dropout properties.
"""
super(ImageEmbedder, self).__init__(params)
self.input_rescaling = params.input_rescaling['image']
self.input_offset = params.input_offset['image']
@property
def schemes(self) -> dict:
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used. Are used
to create Block when ImageEmbedder is initialised.
:return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
"""
return {
EmbedderScheme.Empty:
[],
EmbedderScheme.Shallow:
[
Conv2d(num_filters=32, kernel_size=8, strides=4)
],
# Use for Atari DQN
EmbedderScheme.Medium:
[
Conv2d(num_filters=32, kernel_size=8, strides=4),
Conv2d(num_filters=64, kernel_size=4, strides=2),
Conv2d(num_filters=64, kernel_size=3, strides=1)
],
# Use for Carla
EmbedderScheme.Deep:
[
Conv2d(num_filters=32, kernel_size=5, strides=2),
Conv2d(num_filters=32, kernel_size=3, strides=1),
Conv2d(num_filters=64, kernel_size=3, strides=2),
Conv2d(num_filters=64, kernel_size=3, strides=1),
Conv2d(num_filters=128, kernel_size=3, strides=2),
Conv2d(num_filters=128, kernel_size=3, strides=1),
Conv2d(num_filters=256, kernel_size=3, strides=2),
Conv2d(num_filters=256, kernel_size=3, strides=1)
]
}
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
"""
Used for forward pass through embedder network.
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
:param x: image representing environment state, of shape (batch_size, in_channels, height, width).
:return: embedding of environment state, of shape (batch_size, channels).
"""
if len(x.shape) != 4 and self.scheme != EmbedderScheme.Empty:
raise ValueError("Image embedders expect the input size to have 4 dimensions. The given size is: {}"
.format(x.shape))
return super(ImageEmbedder, self).hybrid_forward(F, x, *args, **kwargs)

View File

@@ -0,0 +1,71 @@
from typing import Union
from types import ModuleType
import mxnet as mx
from mxnet import nd, sym
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
from rl_coach.architectures.mxnet_components.embedders.embedder import InputEmbedder
from rl_coach.architectures.mxnet_components.layers import Dense
from rl_coach.base_parameters import EmbedderScheme
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
class VectorEmbedder(InputEmbedder):
def __init__(self, params: InputEmbedderParameters):
"""
An vector embedder is an input embedder that takes an vector input from the state and produces a vector
embedding by passing it through a neural network.
:param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
and dropout properties.
"""
super(VectorEmbedder, self).__init__(params)
self.input_rescaling = params.input_rescaling['vector']
self.input_offset = params.input_offset['vector']
@property
def schemes(self):
"""
Schemes are the pre-defined network architectures of various depths and complexities that can be used. Are used
to create Block when VectorEmbedder is initialised.
:return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
"""
return {
EmbedderScheme.Empty:
[],
EmbedderScheme.Shallow:
[
Dense(units=128)
],
# Use for DQN
EmbedderScheme.Medium:
[
Dense(units=256)
],
# Use for Carla
EmbedderScheme.Deep:
[
Dense(units=128),
Dense(units=128),
Dense(units=128)
]
}
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
"""
Used for forward pass through embedder network.
:param F: backend api, either `nd` or `sym` (if block has been hybridized).
:type F: nd or sym
:param x: vector representing environment state, of shape (batch_size, in_channels).
:return: embedding of environment state, of shape (batch_size, channels).
"""
if isinstance(x, nd.NDArray) and len(x.shape) != 2 and self.scheme != EmbedderScheme.Empty:
raise ValueError("Vector embedders expect the input size to have 2 dimensions. The given size is: {}"
.format(x.shape))
return super(VectorEmbedder, self).hybrid_forward(F, x, *args, **kwargs)