1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 19:50:17 +01:00

parameter noise exploration - using Noisy Nets

This commit is contained in:
Gal Leibovich
2018-08-27 18:19:01 +03:00
parent 658b437079
commit 1aa2ab0590
49 changed files with 536 additions and 433 deletions

View File

@@ -27,42 +27,18 @@ class FCMiddlewareParameters(MiddlewareParameters):
def __init__(self, activation_function='relu',
scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_fc_embedder"):
name="middleware_fc_embedder", dense_layer=Dense):
super().__init__(parameterized_class=FCMiddleware, activation_function=activation_function,
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name)
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer)
class FCMiddleware(Middleware):
schemes = {
MiddlewareScheme.Empty:
[],
# ppo
MiddlewareScheme.Shallow:
[
Dense([64])
],
# dqn
MiddlewareScheme.Medium:
[
Dense([512])
],
MiddlewareScheme.Deep: \
[
Dense([128]),
Dense([128]),
Dense([128])
]
}
def __init__(self, activation_function=tf.nn.relu,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_fc_embedder"):
name="middleware_fc_embedder", dense_layer=Dense):
super().__init__(activation_function=activation_function, batchnorm=batchnorm,
dropout=dropout, scheme=scheme, name=name)
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer)
self.return_type = Middleware_FC_Embedding
self.layers = []
@@ -70,7 +46,7 @@ class FCMiddleware(Middleware):
self.layers.append(self.input)
if isinstance(self.scheme, MiddlewareScheme):
layers_params = FCMiddleware.schemes[self.scheme]
layers_params = self.schemes[self.scheme]
else:
layers_params = self.scheme
for idx, layer_params in enumerate(layers_params):
@@ -84,3 +60,29 @@ class FCMiddleware(Middleware):
self.output = self.layers[-1]
@property
def schemes(self):
return {
MiddlewareScheme.Empty:
[],
# ppo
MiddlewareScheme.Shallow:
[
self.dense_layer([64])
],
# dqn
MiddlewareScheme.Medium:
[
self.dense_layer([512])
],
MiddlewareScheme.Deep: \
[
self.dense_layer([128]),
self.dense_layer([128]),
self.dense_layer([128])
]
}

View File

@@ -18,7 +18,7 @@
import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
from rl_coach.base_parameters import MiddlewareScheme
from rl_coach.core_types import Middleware_LSTM_Embedding
@@ -28,43 +28,19 @@ class LSTMMiddlewareParameters(MiddlewareParameters):
def __init__(self, activation_function='relu', number_of_lstm_cells=256,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_lstm_embedder"):
name="middleware_lstm_embedder", dense_layer=Dense):
super().__init__(parameterized_class=LSTMMiddleware, activation_function=activation_function,
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name)
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer)
self.number_of_lstm_cells = number_of_lstm_cells
class LSTMMiddleware(Middleware):
schemes = {
MiddlewareScheme.Empty:
[],
# ppo
MiddlewareScheme.Shallow:
[
[64]
],
# dqn
MiddlewareScheme.Medium:
[
[512]
],
MiddlewareScheme.Deep: \
[
[128],
[128],
[128]
]
}
def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_lstm_embedder"):
name="middleware_lstm_embedder", dense_layer=Dense):
super().__init__(activation_function=activation_function, batchnorm=batchnorm,
dropout=dropout, scheme=scheme, name=name)
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer)
self.return_type = Middleware_LSTM_Embedding
self.number_of_lstm_cells = number_of_lstm_cells
self.layers = []
@@ -83,7 +59,7 @@ class LSTMMiddleware(Middleware):
# optionally insert some dense layers before the LSTM
if isinstance(self.scheme, MiddlewareScheme):
layers_params = LSTMMiddleware.schemes[self.scheme]
layers_params = self.schemes[self.scheme]
else:
layers_params = self.scheme
for idx, layer_params in enumerate(layers_params):
@@ -111,3 +87,30 @@ class LSTMMiddleware(Middleware):
lstm_c, lstm_h = lstm_state
self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
self.output = tf.reshape(lstm_outputs, [-1, self.number_of_lstm_cells])
@property
def schemes(self):
return {
MiddlewareScheme.Empty:
[],
# ppo
MiddlewareScheme.Shallow:
[
[64]
],
# dqn
MiddlewareScheme.Medium:
[
[512]
],
MiddlewareScheme.Deep: \
[
[128],
[128],
[128]
]
}

View File

@@ -17,16 +17,16 @@ from typing import Type, Union, List
import tensorflow as tf
from rl_coach.base_parameters import MiddlewareScheme, Parameters
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.base_parameters import MiddlewareScheme, Parameters, NetworkComponentParameters
from rl_coach.core_types import MiddlewareEmbedding
class MiddlewareParameters(Parameters):
class MiddlewareParameters(NetworkComponentParameters):
def __init__(self, parameterized_class: Type['Middleware'],
activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium,
batchnorm: bool=False, dropout: bool=False,
name='middleware'):
super().__init__()
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense):
super().__init__(dense_layer=dense_layer)
self.activation_function = activation_function
self.scheme = scheme
self.batchnorm = batchnorm
@@ -43,7 +43,7 @@ class Middleware(object):
"""
def __init__(self, activation_function=tf.nn.relu,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False, name="middleware_embedder"):
batchnorm: bool = False, dropout: bool = False, name="middleware_embedder", dense_layer=Dense):
self.name = name
self.input = None
self.output = None
@@ -53,6 +53,7 @@ class Middleware(object):
self.dropout_rate = 0
self.scheme = scheme
self.return_type = MiddlewareEmbedding
self.dense_layer = dense_layer
def __call__(self, input_layer):
with tf.variable_scope(self.get_name()):
@@ -66,3 +67,8 @@ class Middleware(object):
def get_name(self):
return self.name
@property
def schemes(self):
raise NotImplementedError("Inheriting middleware must define schemes matching its allowed default "
"configurations.")