From 3a0a1159e978925f03215e5794f0780d6e56671d Mon Sep 17 00:00:00 2001 From: Itai Caspi <30383381+itaicaspi-intel@users.noreply.github.com> Date: Thu, 8 Nov 2018 16:53:47 +0200 Subject: [PATCH] fixing the dropout rate code (#72) addresses issue #53 --- rl_coach/architectures/embedder_parameters.py | 6 +++--- rl_coach/architectures/middleware_parameters.py | 12 ++++++------ .../mxnet_components/embedders/embedder.py | 4 ++-- .../mxnet_components/middlewares/middleware.py | 4 ++-- .../tensorflow_components/embedders/embedder.py | 7 +++---- .../embedders/image_embedder.py | 4 ++-- .../embedders/vector_embedder.py | 6 +++--- .../architectures/tensorflow_components/layers.py | 6 +++--- .../middlewares/fc_middleware.py | 5 +++-- .../middlewares/lstm_middleware.py | 5 +++-- .../tensorflow_components/middlewares/middleware.py | 7 +++---- 11 files changed, 33 insertions(+), 33 deletions(-) diff --git a/rl_coach/architectures/embedder_parameters.py b/rl_coach/architectures/embedder_parameters.py index 2731a52..2973a3a 100644 --- a/rl_coach/architectures/embedder_parameters.py +++ b/rl_coach/architectures/embedder_parameters.py @@ -21,13 +21,13 @@ from rl_coach.base_parameters import EmbedderScheme, NetworkComponentParameters class InputEmbedderParameters(NetworkComponentParameters): def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium, - batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None, - input_clipping=None, dense_layer=None, is_training=False): + batchnorm: bool=False, dropout_rate: float=0.0, name: str='embedder', input_rescaling=None, + input_offset=None, input_clipping=None, dense_layer=None, is_training=False): super().__init__(dense_layer=dense_layer) self.activation_function = activation_function self.scheme = scheme self.batchnorm = batchnorm - self.dropout = dropout + self.dropout_rate = dropout_rate if input_rescaling is None: input_rescaling = {'image': 255.0, 'vector': 1.0} diff --git a/rl_coach/architectures/middleware_parameters.py b/rl_coach/architectures/middleware_parameters.py index 711ec06..40533cd 100644 --- a/rl_coach/architectures/middleware_parameters.py +++ b/rl_coach/architectures/middleware_parameters.py @@ -22,12 +22,12 @@ from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameter class MiddlewareParameters(NetworkComponentParameters): def __init__(self, parameterized_class_name: str, activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium, - batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=None, is_training=False): + batchnorm: bool=False, dropout_rate: float=0.0, name='middleware', dense_layer=None, is_training=False): super().__init__(dense_layer=dense_layer) self.activation_function = activation_function self.scheme = scheme self.batchnorm = batchnorm - self.dropout = dropout + self.dropout_rate = dropout_rate self.name = name self.is_training = is_training self.parameterized_class_name = parameterized_class_name @@ -36,19 +36,19 @@ class MiddlewareParameters(NetworkComponentParameters): class FCMiddlewareParameters(MiddlewareParameters): def __init__(self, activation_function='relu', scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, + batchnorm: bool = False, dropout_rate: float = 0.0, name="middleware_fc_embedder", dense_layer=None, is_training=False): super().__init__(parameterized_class_name="FCMiddleware", activation_function=activation_function, - scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, + scheme=scheme, batchnorm=batchnorm, dropout_rate=dropout_rate, name=name, dense_layer=dense_layer, is_training=is_training) class LSTMMiddlewareParameters(MiddlewareParameters): def __init__(self, activation_function='relu', number_of_lstm_cells=256, scheme: MiddlewareScheme = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, + batchnorm: bool = False, dropout_rate: float = 0.0, name="middleware_lstm_embedder", dense_layer=None, is_training=False): super().__init__(parameterized_class_name="LSTMMiddleware", activation_function=activation_function, - scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, + scheme=scheme, batchnorm=batchnorm, dropout_rate=dropout_rate, name=name, dense_layer=dense_layer, is_training=is_training) self.number_of_lstm_cells = number_of_lstm_cells \ No newline at end of file diff --git a/rl_coach/architectures/mxnet_components/embedders/embedder.py b/rl_coach/architectures/mxnet_components/embedders/embedder.py index c2b6340..7a92855 100644 --- a/rl_coach/architectures/mxnet_components/embedders/embedder.py +++ b/rl_coach/architectures/mxnet_components/embedders/embedder.py @@ -39,8 +39,8 @@ class InputEmbedder(nn.HybridBlock): self.net.add(nn.BatchNorm()) if params.activation_function: self.net.add(nn.Activation(params.activation_function)) - if params.dropout: - self.net.add(nn.Dropout(rate=params.dropout)) + if params.dropout_rate: + self.net.add(nn.Dropout(rate=params.dropout_rate)) @property def schemes(self) -> dict: diff --git a/rl_coach/architectures/mxnet_components/middlewares/middleware.py b/rl_coach/architectures/mxnet_components/middlewares/middleware.py index 8b9db01..dd31b38 100644 --- a/rl_coach/architectures/mxnet_components/middlewares/middleware.py +++ b/rl_coach/architectures/mxnet_components/middlewares/middleware.py @@ -36,8 +36,8 @@ class Middleware(nn.HybridBlock): self.net.add(nn.BatchNorm()) if params.activation_function: self.net.add(nn.Activation(params.activation_function)) - if params.dropout: - self.net.add(nn.Dropout(rate=params.dropout)) + if params.dropout_rate: + self.net.add(nn.Dropout(rate=params.dropout_rate)) @property def schemes(self) -> dict: diff --git a/rl_coach/architectures/tensorflow_components/embedders/embedder.py b/rl_coach/architectures/tensorflow_components/embedders/embedder.py index 967b1ba..8a7e7ff 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/embedder.py @@ -34,15 +34,14 @@ class InputEmbedder(object): can be multiple embedders in a single network """ def __init__(self, input_size: List[int], activation_function=tf.nn.relu, - scheme: EmbedderScheme=None, batchnorm: bool=False, dropout: bool=False, + scheme: EmbedderScheme=None, batchnorm: bool=False, dropout_rate: float=0.0, name: str= "embedder", input_rescaling=1.0, input_offset=0.0, input_clipping=None, dense_layer=Dense, is_training=False): self.name = name self.input_size = input_size self.activation_function = activation_function self.batchnorm = batchnorm - self.dropout = dropout - self.dropout_rate = 0 + self.dropout_rate = dropout_rate self.input = None self.output = None self.scheme = scheme @@ -68,7 +67,7 @@ class InputEmbedder(object): # we allow adding batchnorm, dropout or activation functions after each layer. # The motivation is to simplify the transition between a network with batchnorm and a network without # batchnorm to a single flag (the same applies to activation function and dropout) - if self.batchnorm or self.activation_function or self.dropout: + if self.batchnorm or self.activation_function or self.dropout_rate > 0: for layer_idx in reversed(range(len(self.layers_params))): self.layers_params.insert(layer_idx+1, BatchnormActivationDropout(batchnorm=self.batchnorm, diff --git a/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py b/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py index b28d7f9..b05ec8e 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/image_embedder.py @@ -32,10 +32,10 @@ class ImageEmbedder(InputEmbedder): """ def __init__(self, input_size: List[int], activation_function=tf.nn.relu, - scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout: bool=False, + scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout_rate: float=0.0, name: str= "embedder", input_rescaling: float=255.0, input_offset: float=0.0, input_clipping=None, dense_layer=Dense, is_training=False): - super().__init__(input_size, activation_function, scheme, batchnorm, dropout, name, input_rescaling, + super().__init__(input_size, activation_function, scheme, batchnorm, dropout_rate, name, input_rescaling, input_offset, input_clipping, dense_layer=dense_layer, is_training=is_training) self.return_type = InputImageEmbedding if len(input_size) != 3 and scheme != EmbedderScheme.Empty: diff --git a/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py b/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py index 625aab7..60b728d 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/vector_embedder.py @@ -31,10 +31,10 @@ class VectorEmbedder(InputEmbedder): """ def __init__(self, input_size: List[int], activation_function=tf.nn.relu, - scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout: bool=False, - name: str= "embedder", input_rescaling: float=1.0, input_offset:float=0.0, input_clipping=None, + scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout_rate: float=0.0, + name: str= "embedder", input_rescaling: float=1.0, input_offset: float=0.0, input_clipping=None, dense_layer=Dense, is_training=False): - super().__init__(input_size, activation_function, scheme, batchnorm, dropout, name, + super().__init__(input_size, activation_function, scheme, batchnorm, dropout_rate, name, input_rescaling, input_offset, input_clipping, dense_layer=dense_layer, is_training=is_training) diff --git a/rl_coach/architectures/tensorflow_components/layers.py b/rl_coach/architectures/tensorflow_components/layers.py index f9dc356..1156937 100644 --- a/rl_coach/architectures/tensorflow_components/layers.py +++ b/rl_coach/architectures/tensorflow_components/layers.py @@ -8,7 +8,7 @@ from rl_coach.architectures import layers from rl_coach.architectures.tensorflow_components import utils -def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, is_training, name): +def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout_rate, is_training, name): layers = [input_layer] # batchnorm @@ -26,7 +26,7 @@ def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dr ) # dropout - if dropout: + if dropout_rate > 0: layers.append( tf.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training) ) @@ -100,7 +100,7 @@ class BatchnormActivationDropout(layers.BatchnormActivationDropout): """ return batchnorm_activation_dropout(input_layer, batchnorm=self.batchnorm, activation_function=self.activation_function, - dropout=self.dropout_rate > 0, dropout_rate=self.dropout_rate, + dropout_rate=self.dropout_rate, is_training=is_training, name=name) @staticmethod diff --git a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py index f85db82..816674a 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py @@ -27,10 +27,11 @@ from rl_coach.utils import force_list class FCMiddleware(Middleware): def __init__(self, activation_function=tf.nn.relu, scheme: MiddlewareScheme = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, + batchnorm: bool = False, dropout_rate: float = 0.0, name="middleware_fc_embedder", dense_layer=Dense, is_training=False): super().__init__(activation_function=activation_function, batchnorm=batchnorm, - dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer, is_training=is_training) + dropout_rate=dropout_rate, scheme=scheme, name=name, dense_layer=dense_layer, + is_training=is_training) self.return_type = Middleware_FC_Embedding self.layers = [] diff --git a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py index 7c4a1b0..6b7f97d 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py @@ -28,10 +28,11 @@ from rl_coach.utils import force_list class LSTMMiddleware(Middleware): def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256, scheme: MiddlewareScheme = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, + batchnorm: bool = False, dropout_rate: float = 0.0, name="middleware_lstm_embedder", dense_layer=Dense, is_training=False): super().__init__(activation_function=activation_function, batchnorm=batchnorm, - dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer, is_training=is_training) + dropout_rate=dropout_rate, scheme=scheme, name=name, dense_layer=dense_layer, + is_training=is_training) self.return_type = Middleware_LSTM_Embedding self.number_of_lstm_cells = number_of_lstm_cells self.layers = [] diff --git a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py index bb10ea9..17c6a2f 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py @@ -31,15 +31,14 @@ class Middleware(object): """ def __init__(self, activation_function=tf.nn.relu, scheme: MiddlewareScheme = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, name="middleware_embedder", dense_layer=Dense, + batchnorm: bool = False, dropout_rate: float = 0.0, name="middleware_embedder", dense_layer=Dense, is_training=False): self.name = name self.input = None self.output = None self.activation_function = activation_function self.batchnorm = batchnorm - self.dropout = dropout - self.dropout_rate = 0 + self.dropout_rate = dropout_rate self.scheme = scheme self.return_type = MiddlewareEmbedding self.dense_layer = dense_layer @@ -58,7 +57,7 @@ class Middleware(object): # we allow adding batchnorm, dropout or activation functions after each layer. # The motivation is to simplify the transition between a network with batchnorm and a network without # batchnorm to a single flag (the same applies to activation function and dropout) - if self.batchnorm or self.activation_function or self.dropout: + if self.batchnorm or self.activation_function or self.dropout_rate > 0: for layer_idx in reversed(range(len(self.layers_params))): self.layers_params.insert(layer_idx+1, BatchnormActivationDropout(batchnorm=self.batchnorm,