mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Adding mxnet components to rl_coach architectures. - Supports PPO and DQN - Tested with CartPole_PPO and CarPole_DQN - Normalizing filters don't work right now (see #49) and are disabled in CartPole_PPO preset - Checkpointing is disabled for MXNet
77 lines
3.3 KiB
Python
77 lines
3.3 KiB
Python
from typing import Union
|
|
from types import ModuleType
|
|
|
|
import mxnet as mx
|
|
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
|
|
from rl_coach.architectures.mxnet_components.embedders.embedder import InputEmbedder
|
|
from rl_coach.architectures.mxnet_components.layers import Conv2d
|
|
from rl_coach.base_parameters import EmbedderScheme
|
|
|
|
nd_sym_type = Union[mx.nd.NDArray, mx.sym.Symbol]
|
|
|
|
|
|
class ImageEmbedder(InputEmbedder):
|
|
def __init__(self, params: InputEmbedderParameters):
|
|
"""
|
|
An image embedder is an input embedder that takes an image input from the state and produces a vector
|
|
embedding by passing it through a neural network.
|
|
|
|
:param params: parameters object containing input_clipping, input_rescaling, batchnorm, activation_function
|
|
and dropout properties.
|
|
"""
|
|
super(ImageEmbedder, self).__init__(params)
|
|
self.input_rescaling = params.input_rescaling['image']
|
|
self.input_offset = params.input_offset['image']
|
|
|
|
@property
|
|
def schemes(self) -> dict:
|
|
"""
|
|
Schemes are the pre-defined network architectures of various depths and complexities that can be used. Are used
|
|
to create Block when ImageEmbedder is initialised.
|
|
|
|
:return: dictionary of schemes, with key of type EmbedderScheme enum and value being list of mxnet.gluon.Block.
|
|
"""
|
|
return {
|
|
EmbedderScheme.Empty:
|
|
[],
|
|
|
|
EmbedderScheme.Shallow:
|
|
[
|
|
Conv2d(num_filters=32, kernel_size=8, strides=4)
|
|
],
|
|
|
|
# Use for Atari DQN
|
|
EmbedderScheme.Medium:
|
|
[
|
|
Conv2d(num_filters=32, kernel_size=8, strides=4),
|
|
Conv2d(num_filters=64, kernel_size=4, strides=2),
|
|
Conv2d(num_filters=64, kernel_size=3, strides=1)
|
|
],
|
|
|
|
# Use for Carla
|
|
EmbedderScheme.Deep:
|
|
[
|
|
Conv2d(num_filters=32, kernel_size=5, strides=2),
|
|
Conv2d(num_filters=32, kernel_size=3, strides=1),
|
|
Conv2d(num_filters=64, kernel_size=3, strides=2),
|
|
Conv2d(num_filters=64, kernel_size=3, strides=1),
|
|
Conv2d(num_filters=128, kernel_size=3, strides=2),
|
|
Conv2d(num_filters=128, kernel_size=3, strides=1),
|
|
Conv2d(num_filters=256, kernel_size=3, strides=2),
|
|
Conv2d(num_filters=256, kernel_size=3, strides=1)
|
|
]
|
|
}
|
|
|
|
def hybrid_forward(self, F: ModuleType, x: nd_sym_type, *args, **kwargs) -> nd_sym_type:
|
|
"""
|
|
Used for forward pass through embedder network.
|
|
|
|
:param F: backend api, either `mxnet.nd` or `mxnet.sym` (if block has been hybridized).
|
|
:param x: image representing environment state, of shape (batch_size, in_channels, height, width).
|
|
:return: embedding of environment state, of shape (batch_size, channels).
|
|
"""
|
|
if len(x.shape) != 4 and self.scheme != EmbedderScheme.Empty:
|
|
raise ValueError("Image embedders expect the input size to have 4 dimensions. The given size is: {}"
|
|
.format(x.shape))
|
|
return super(ImageEmbedder, self).hybrid_forward(F, x, *args, **kwargs)
|