mirror of
https://github.com/gryf/coach.git
synced 2026-02-19 07:55:49 +01:00
network_imporvements branch merge
This commit is contained in:
@@ -13,9 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import math
|
||||
import time
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
@@ -27,135 +25,6 @@ from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list, squeeze_list
|
||||
|
||||
|
||||
def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, layer_idx):
|
||||
layers = [input_layer]
|
||||
|
||||
# batchnorm
|
||||
if batchnorm:
|
||||
layers.append(
|
||||
tf.layers.batch_normalization(layers[-1], name="batchnorm{}".format(layer_idx))
|
||||
)
|
||||
|
||||
# activation
|
||||
if activation_function:
|
||||
layers.append(
|
||||
activation_function(layers[-1], name="activation{}".format(layer_idx))
|
||||
)
|
||||
|
||||
# dropout
|
||||
if dropout:
|
||||
layers.append(
|
||||
tf.layers.dropout(layers[-1], dropout_rate, name="dropout{}".format(layer_idx))
|
||||
)
|
||||
|
||||
# remove the input layer from the layers list
|
||||
del layers[0]
|
||||
|
||||
return layers
|
||||
|
||||
|
||||
class Conv2d(object):
|
||||
def __init__(self, params: List):
|
||||
"""
|
||||
:param params: list of [num_filters, kernel_size, strides]
|
||||
"""
|
||||
self.params = params
|
||||
|
||||
def __call__(self, input_layer, name: str=None):
|
||||
"""
|
||||
returns a tensorflow conv2d layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:return: conv2d layer
|
||||
"""
|
||||
return tf.layers.conv2d(input_layer, filters=self.params[0], kernel_size=self.params[1], strides=self.params[2],
|
||||
data_format='channels_last', name=name)
|
||||
|
||||
|
||||
class Dense(object):
|
||||
def __init__(self, params: Union[List, int]):
|
||||
"""
|
||||
:param params: list of [num_output_neurons]
|
||||
"""
|
||||
self.params = force_list(params)
|
||||
|
||||
def __call__(self, input_layer, name: str=None, kernel_initializer=None, activation=None):
|
||||
"""
|
||||
returns a tensorflow dense layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:return: dense layer
|
||||
"""
|
||||
return tf.layers.dense(input_layer, self.params[0], name=name, kernel_initializer=kernel_initializer,
|
||||
activation=activation)
|
||||
|
||||
|
||||
class NoisyNetDense(object):
|
||||
"""
|
||||
A factorized Noisy Net layer
|
||||
|
||||
https://arxiv.org/abs/1706.10295.
|
||||
"""
|
||||
|
||||
def __init__(self, params: List):
|
||||
"""
|
||||
:param params: list of [num_output_neurons]
|
||||
"""
|
||||
self.params = force_list(params)
|
||||
self.sigma0 = 0.5
|
||||
|
||||
def __call__(self, input_layer, name: str, kernel_initializer=None, activation=None):
|
||||
"""
|
||||
returns a NoisyNet dense layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:param kernel_initializer: initializer for kernels. Default is to use Gaussian noise that preserves stddev.
|
||||
:param activation: the activation function
|
||||
:return: dense layer
|
||||
"""
|
||||
#TODO: noise sampling should be externally controlled. DQN is fine with sampling noise for every
|
||||
# forward (either act or train, both for online and target networks).
|
||||
# A3C, on the other hand, should sample noise only when policy changes (i.e. after every t_max steps)
|
||||
|
||||
num_inputs = input_layer.get_shape()[-1].value
|
||||
num_outputs = self.params[0]
|
||||
|
||||
stddev = 1 / math.sqrt(num_inputs)
|
||||
activation = activation if activation is not None else (lambda x: x)
|
||||
|
||||
if kernel_initializer is None:
|
||||
kernel_mean_initializer = tf.random_uniform_initializer(-stddev, stddev)
|
||||
kernel_stddev_initializer = tf.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0)
|
||||
else:
|
||||
kernel_mean_initializer = kernel_stddev_initializer = kernel_initializer
|
||||
with tf.variable_scope(None, default_name=name):
|
||||
weight_mean = tf.get_variable('weight_mean', shape=(num_inputs, num_outputs),
|
||||
initializer=kernel_mean_initializer)
|
||||
bias_mean = tf.get_variable('bias_mean', shape=(num_outputs,), initializer=tf.zeros_initializer())
|
||||
|
||||
weight_stddev = tf.get_variable('weight_stddev', shape=(num_inputs, num_outputs),
|
||||
initializer=kernel_stddev_initializer)
|
||||
bias_stddev = tf.get_variable('bias_stddev', shape=(num_outputs,),
|
||||
initializer=kernel_stddev_initializer)
|
||||
bias_noise = self.f(tf.random_normal((num_outputs,)))
|
||||
weight_noise = self.factorized_noise(num_inputs, num_outputs)
|
||||
|
||||
bias = bias_mean + bias_stddev * bias_noise
|
||||
weight = weight_mean + weight_stddev * weight_noise
|
||||
return activation(tf.matmul(input_layer, weight) + bias)
|
||||
|
||||
def factorized_noise(self, inputs, outputs):
|
||||
# TODO: use factorized noise only for compute intensive algos (e.g. DQN).
|
||||
# lighter algos (e.g. DQN) should not use it
|
||||
noise1 = self.f(tf.random_normal((inputs, 1)))
|
||||
noise2 = self.f(tf.random_normal((1, outputs)))
|
||||
return tf.matmul(noise1, noise2)
|
||||
|
||||
@staticmethod
|
||||
def f(values):
|
||||
return tf.sqrt(tf.abs(values)) * tf.sign(values)
|
||||
|
||||
|
||||
def variable_summaries(var):
|
||||
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
|
||||
with tf.name_scope('summaries'):
|
||||
@@ -720,6 +589,14 @@ class TensorFlowArchitecture(Architecture):
|
||||
"""
|
||||
self.sess.run(assign_op, feed_dict={placeholder: value})
|
||||
|
||||
def set_is_training(self, state: bool):
|
||||
"""
|
||||
Set the phase of the network between training and testing
|
||||
:param state: The current state (True = Training, False = Testing)
|
||||
:return: None
|
||||
"""
|
||||
self.set_variable_value(self.assign_is_training, state, self.is_training_placeholder)
|
||||
|
||||
def reset_internal_memory(self):
|
||||
"""
|
||||
Reset any internal memory used by the network. For example, an LSTM internal state
|
||||
@@ -728,4 +605,4 @@ class TensorFlowArchitecture(Architecture):
|
||||
# initialize LSTM hidden states
|
||||
if self.middleware.__class__.__name__ == 'LSTMMiddleware':
|
||||
self.curr_rnn_c_in = self.middleware.c_init
|
||||
self.curr_rnn_h_in = self.middleware.h_init
|
||||
self.curr_rnn_h_in = self.middleware.h_init
|
||||
|
||||
@@ -15,20 +15,23 @@
|
||||
#
|
||||
|
||||
from typing import List, Union
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense, \
|
||||
BatchnormActivationDropout
|
||||
from rl_coach.base_parameters import EmbedderScheme, NetworkComponentParameters
|
||||
|
||||
from rl_coach.core_types import InputEmbedding
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
class InputEmbedderParameters(NetworkComponentParameters):
|
||||
def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium,
|
||||
batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None,
|
||||
input_clipping=None, dense_layer=Dense):
|
||||
input_clipping=None, dense_layer=Dense, is_training=False):
|
||||
super().__init__(dense_layer=dense_layer)
|
||||
self.activation_function = activation_function
|
||||
self.scheme = scheme
|
||||
@@ -44,6 +47,7 @@ class InputEmbedderParameters(NetworkComponentParameters):
|
||||
self.input_offset = input_offset
|
||||
self.input_clipping = input_clipping
|
||||
self.name = name
|
||||
self.is_training = is_training
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
@@ -61,7 +65,8 @@ class InputEmbedder(object):
|
||||
"""
|
||||
def __init__(self, input_size: List[int], activation_function=tf.nn.relu,
|
||||
scheme: EmbedderScheme=None, batchnorm: bool=False, dropout: bool=False,
|
||||
name: str= "embedder", input_rescaling=1.0, input_offset=0.0, input_clipping=None, dense_layer=Dense):
|
||||
name: str= "embedder", input_rescaling=1.0, input_offset=0.0, input_clipping=None, dense_layer=Dense,
|
||||
is_training=False):
|
||||
self.name = name
|
||||
self.input_size = input_size
|
||||
self.activation_function = activation_function
|
||||
@@ -72,11 +77,29 @@ class InputEmbedder(object):
|
||||
self.output = None
|
||||
self.scheme = scheme
|
||||
self.return_type = InputEmbedding
|
||||
self.layers_params = []
|
||||
self.layers = []
|
||||
self.input_rescaling = input_rescaling
|
||||
self.input_offset = input_offset
|
||||
self.input_clipping = input_clipping
|
||||
self.dense_layer = dense_layer
|
||||
self.is_training = is_training
|
||||
|
||||
# layers order is conv -> batchnorm -> activation -> dropout
|
||||
if isinstance(self.scheme, EmbedderScheme):
|
||||
self.layers_params = copy.copy(self.schemes[self.scheme])
|
||||
else:
|
||||
self.layers_params = copy.copy(self.scheme)
|
||||
|
||||
# we allow adding batchnorm, dropout or activation functions after each layer.
|
||||
# The motivation is to simplify the transition between a network with batchnorm and a network without
|
||||
# batchnorm to a single flag (the same applies to activation function and dropout)
|
||||
if self.batchnorm or self.activation_function or self.dropout:
|
||||
for layer_idx in reversed(range(len(self.layers_params))):
|
||||
self.layers_params.insert(layer_idx+1,
|
||||
BatchnormActivationDropout(batchnorm=self.batchnorm,
|
||||
activation_function=self.activation_function,
|
||||
dropout_rate=self.dropout_rate))
|
||||
|
||||
def __call__(self, prev_input_placeholder=None):
|
||||
with tf.variable_scope(self.get_name()):
|
||||
@@ -102,19 +125,11 @@ class InputEmbedder(object):
|
||||
|
||||
self.layers.append(input_layer)
|
||||
|
||||
# layers order is conv -> batchnorm -> activation -> dropout
|
||||
if isinstance(self.scheme, EmbedderScheme):
|
||||
layers_params = self.schemes[self.scheme]
|
||||
else:
|
||||
layers_params = self.scheme
|
||||
for idx, layer_params in enumerate(layers_params):
|
||||
self.layers.append(
|
||||
layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
|
||||
)
|
||||
|
||||
self.layers.extend(batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
|
||||
self.activation_function, self.dropout,
|
||||
self.dropout_rate, idx))
|
||||
for idx, layer_params in enumerate(self.layers_params):
|
||||
self.layers.extend(force_list(
|
||||
layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx),
|
||||
is_training=self.is_training)
|
||||
))
|
||||
|
||||
self.output = tf.contrib.layers.flatten(self.layers[-1])
|
||||
|
||||
@@ -140,4 +155,14 @@ class InputEmbedder(object):
|
||||
"configurations.")
|
||||
|
||||
def get_name(self):
|
||||
return self.name
|
||||
return self.name
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
if self.input_rescaling != 1.0 or self.input_offset != 0.0:
|
||||
result.append('Input Normalization (scale = {}, offset = {})'.format(self.input_rescaling, self.input_offset))
|
||||
result.extend([str(l) for l in self.layers_params])
|
||||
if self.layers_params:
|
||||
return '\n'.join(result)
|
||||
else:
|
||||
return 'No layers'
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import List
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense
|
||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
from rl_coach.core_types import InputImageEmbedding
|
||||
@@ -34,9 +34,9 @@ class ImageEmbedder(InputEmbedder):
|
||||
def __init__(self, input_size: List[int], activation_function=tf.nn.relu,
|
||||
scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout: bool=False,
|
||||
name: str= "embedder", input_rescaling: float=255.0, input_offset: float=0.0, input_clipping=None,
|
||||
dense_layer=Dense):
|
||||
dense_layer=Dense, is_training=False):
|
||||
super().__init__(input_size, activation_function, scheme, batchnorm, dropout, name, input_rescaling,
|
||||
input_offset, input_clipping, dense_layer=dense_layer)
|
||||
input_offset, input_clipping, dense_layer=dense_layer, is_training=is_training)
|
||||
self.return_type = InputImageEmbedding
|
||||
if len(input_size) != 3 and scheme != EmbedderScheme.Empty:
|
||||
raise ValueError("Image embedders expect the input size to have 3 dimensions. The given size is: {}"
|
||||
@@ -50,28 +50,28 @@ class ImageEmbedder(InputEmbedder):
|
||||
|
||||
EmbedderScheme.Shallow:
|
||||
[
|
||||
Conv2d([32, 3, 1])
|
||||
Conv2d(32, 3, 1)
|
||||
],
|
||||
|
||||
# atari dqn
|
||||
EmbedderScheme.Medium:
|
||||
[
|
||||
Conv2d([32, 8, 4]),
|
||||
Conv2d([64, 4, 2]),
|
||||
Conv2d([64, 3, 1])
|
||||
Conv2d(32, 8, 4),
|
||||
Conv2d(64, 4, 2),
|
||||
Conv2d(64, 3, 1)
|
||||
],
|
||||
|
||||
# carla
|
||||
EmbedderScheme.Deep: \
|
||||
[
|
||||
Conv2d([32, 5, 2]),
|
||||
Conv2d([32, 3, 1]),
|
||||
Conv2d([64, 3, 2]),
|
||||
Conv2d([64, 3, 1]),
|
||||
Conv2d([128, 3, 2]),
|
||||
Conv2d([128, 3, 1]),
|
||||
Conv2d([256, 3, 2]),
|
||||
Conv2d([256, 3, 1])
|
||||
Conv2d(32, 5, 2),
|
||||
Conv2d(32, 3, 1),
|
||||
Conv2d(64, 3, 2),
|
||||
Conv2d(64, 3, 1),
|
||||
Conv2d(128, 3, 2),
|
||||
Conv2d(128, 3, 1),
|
||||
Conv2d(256, 3, 2),
|
||||
Conv2d(256, 3, 1)
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import List
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedder
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
from rl_coach.core_types import InputVectorEmbedding
|
||||
@@ -33,9 +33,10 @@ class VectorEmbedder(InputEmbedder):
|
||||
def __init__(self, input_size: List[int], activation_function=tf.nn.relu,
|
||||
scheme: EmbedderScheme=EmbedderScheme.Medium, batchnorm: bool=False, dropout: bool=False,
|
||||
name: str= "embedder", input_rescaling: float=1.0, input_offset:float=0.0, input_clipping=None,
|
||||
dense_layer=Dense):
|
||||
dense_layer=Dense, is_training=False):
|
||||
super().__init__(input_size, activation_function, scheme, batchnorm, dropout, name,
|
||||
input_rescaling, input_offset, input_clipping, dense_layer=dense_layer)
|
||||
input_rescaling, input_offset, input_clipping, dense_layer=dense_layer,
|
||||
is_training=is_training)
|
||||
|
||||
self.return_type = InputVectorEmbedding
|
||||
if len(self.input_size) != 1 and scheme != EmbedderScheme.Empty:
|
||||
@@ -49,20 +50,20 @@ class VectorEmbedder(InputEmbedder):
|
||||
|
||||
EmbedderScheme.Shallow:
|
||||
[
|
||||
self.dense_layer([128])
|
||||
self.dense_layer(128)
|
||||
],
|
||||
|
||||
# dqn
|
||||
EmbedderScheme.Medium:
|
||||
[
|
||||
self.dense_layer([256])
|
||||
self.dense_layer(256)
|
||||
],
|
||||
|
||||
# carla
|
||||
EmbedderScheme.Deep: \
|
||||
[
|
||||
self.dense_layer([128]),
|
||||
self.dense_layer([128]),
|
||||
self.dense_layer([128])
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128)
|
||||
]
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ from rl_coach.architectures.tensorflow_components.middlewares.middleware import
|
||||
from rl_coach.base_parameters import AgentParameters, EmbeddingMergerType
|
||||
from rl_coach.core_types import PredictionType
|
||||
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
|
||||
from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params
|
||||
from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params, indent_string
|
||||
|
||||
|
||||
class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
@@ -80,6 +80,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
return ret_dict
|
||||
|
||||
self.available_return_types = fill_return_types()
|
||||
self.is_training = None
|
||||
|
||||
def predict_with_prediction_type(self, states: Dict[str, np.ndarray],
|
||||
prediction_type: PredictionType) -> Dict[str, np.ndarray]:
|
||||
@@ -161,7 +162,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy)
|
||||
return module
|
||||
|
||||
def get_output_head(self, head_params: HeadParameters, head_idx: int, loss_weight: float=1.):
|
||||
def get_output_head(self, head_params: HeadParameters, head_idx: int):
|
||||
"""
|
||||
Given a head type, creates the head and returns it
|
||||
:param head_params: the parameters of the head to create
|
||||
@@ -176,7 +177,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
head_params_copy.activation_function = self.get_activation_function(head_params_copy.activation_function)
|
||||
return dynamic_import_and_instantiate_module_from_params(head_params_copy, extra_kwargs={
|
||||
'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
|
||||
'head_idx': head_idx, 'loss_weight': loss_weight, 'is_local': self.network_is_local})
|
||||
'head_idx': head_idx, 'is_local': self.network_is_local})
|
||||
|
||||
def get_model(self):
|
||||
# validate the configuration
|
||||
@@ -189,11 +190,10 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
if self.network_parameters.middleware_parameters is None:
|
||||
raise ValueError("Exactly one middleware type should be defined")
|
||||
|
||||
if len(self.network_parameters.loss_weights) == 0:
|
||||
raise ValueError("At least one loss weight should be defined")
|
||||
|
||||
if len(self.network_parameters.heads_parameters) != len(self.network_parameters.loss_weights):
|
||||
raise ValueError("Number of loss weights should match the number of output types")
|
||||
# ops for defining the training / testing phase
|
||||
self.is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
|
||||
self.is_training_placeholder = tf.placeholder("bool")
|
||||
self.assign_is_training = tf.assign(self.is_training, self.is_training_placeholder)
|
||||
|
||||
for network_idx in range(self.num_networks):
|
||||
with tf.variable_scope('network_{}'.format(network_idx)):
|
||||
@@ -245,28 +245,27 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
|
||||
head_count = 0
|
||||
for head_idx in range(self.num_heads_per_network):
|
||||
for head_copy_idx in range(self.network_parameters.num_output_head_copies):
|
||||
if self.network_parameters.use_separate_networks_per_head:
|
||||
# if we use separate networks per head, then the head type corresponds top the network idx
|
||||
head_type_idx = network_idx
|
||||
head_count = network_idx
|
||||
else:
|
||||
# if we use a single network with multiple embedders, then the head type is the current head idx
|
||||
head_type_idx = head_idx
|
||||
|
||||
if self.network_parameters.use_separate_networks_per_head:
|
||||
# if we use separate networks per head, then the head type corresponds to the network idx
|
||||
head_type_idx = network_idx
|
||||
head_count = network_idx
|
||||
else:
|
||||
# if we use a single network with multiple embedders, then the head type is the current head idx
|
||||
head_type_idx = head_idx
|
||||
head_params = self.network_parameters.heads_parameters[head_type_idx]
|
||||
|
||||
for head_copy_idx in range(head_params.num_output_head_copies):
|
||||
# create output head and add it to the output heads list
|
||||
self.output_heads.append(
|
||||
self.get_output_head(self.network_parameters.heads_parameters[head_type_idx],
|
||||
head_idx*self.network_parameters.num_output_head_copies + head_copy_idx,
|
||||
self.network_parameters.loss_weights[head_type_idx])
|
||||
self.get_output_head(head_params,
|
||||
head_idx*head_params.num_output_head_copies + head_copy_idx)
|
||||
)
|
||||
|
||||
# rescale the gradients from the head
|
||||
self.gradients_from_head_rescalers.append(
|
||||
tf.get_variable('gradients_from_head_{}-{}_rescalers'.format(head_idx, head_copy_idx),
|
||||
initializer=float(
|
||||
self.network_parameters.rescale_gradient_from_head_by_factor[head_count]
|
||||
),
|
||||
initializer=float(head_params.rescale_gradient_from_head_by_factor),
|
||||
dtype=tf.float32))
|
||||
|
||||
self.gradients_from_head_rescalers_placeholders.append(
|
||||
@@ -344,4 +343,46 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
|
||||
else:
|
||||
raise Exception("{} is not a valid optimizer type".format(self.network_parameters.optimizer_type))
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
|
||||
for network in range(self.num_networks):
|
||||
network_structure = []
|
||||
|
||||
# embedder
|
||||
for embedder in self.input_embedders:
|
||||
network_structure.append("Input Embedder: {}".format(embedder.name))
|
||||
network_structure.append(indent_string(str(embedder)))
|
||||
|
||||
if len(self.input_embedders) > 1:
|
||||
network_structure.append("{} ({})".format(self.network_parameters.embedding_merger_type.name,
|
||||
", ".join(["{} embedding".format(e.name) for e in self.input_embedders])))
|
||||
|
||||
# middleware
|
||||
network_structure.append("Middleware:")
|
||||
network_structure.append(indent_string(str(self.middleware)))
|
||||
|
||||
# head
|
||||
if self.network_parameters.use_separate_networks_per_head:
|
||||
heads = range(network, network+1)
|
||||
else:
|
||||
heads = range(0, len(self.output_heads))
|
||||
|
||||
for head_idx in heads:
|
||||
head = self.output_heads[head_idx]
|
||||
head_params = self.network_parameters.heads_parameters[head_idx]
|
||||
if head_params.num_output_head_copies > 1:
|
||||
network_structure.append("Output Head: {} (num copies = {})".format(head.name, head_params.num_output_head_copies))
|
||||
else:
|
||||
network_structure.append("Output Head: {}".format(head.name))
|
||||
network_structure.append(indent_string(str(head)))
|
||||
|
||||
# finalize network
|
||||
if self.num_networks > 1:
|
||||
result.append("Sub-network for head: {}".format(self.output_heads[network].name))
|
||||
result.append(indent_string('\n'.join(network_structure)))
|
||||
else:
|
||||
result.append('\n'.join(network_structure))
|
||||
|
||||
result = '\n'.join(result)
|
||||
return result
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class CategoricalQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class CategoricalQHead(Head):
|
||||
@@ -54,3 +58,12 @@ class CategoricalQHead(Head):
|
||||
self.target = self.distributions
|
||||
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"Reshape (output size = {} x {})".format(self.num_actions, self.num_atoms),
|
||||
"Softmax"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,27 +16,34 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
class RegressionHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
|
||||
super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class RegressionHead(Head):
|
||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||
dense_layer=Dense):
|
||||
dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
|
||||
super().__init__(agent_parameters, spaces, network_name, head_idx, loss_weight, is_local, activation_function,
|
||||
dense_layer=dense_layer)
|
||||
self.name = 'regression_head'
|
||||
self.scheme = scheme
|
||||
self.layers = []
|
||||
if isinstance(self.spaces.action, BoxActionSpace):
|
||||
self.num_actions = self.spaces.action.shape[0]
|
||||
elif isinstance(self.spaces.action, DiscreteActionSpace):
|
||||
@@ -48,9 +55,18 @@ class RegressionHead(Head):
|
||||
self.loss_type = tf.losses.mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
self.fc1 = self.dense_layer(256)(input_layer)
|
||||
self.fc2 = self.dense_layer(256)(self.fc1)
|
||||
self.output = self.dense_layer(self.num_actions)(self.fc2, name='output')
|
||||
self.layers.append(input_layer)
|
||||
for idx, layer_params in enumerate(self.scheme):
|
||||
self.layers.extend(force_list(
|
||||
layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
|
||||
))
|
||||
|
||||
self.layers.append(self.dense_layer(self.num_actions)(self.layers[-1], name='output'))
|
||||
self.output = self.layers[-1]
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
for layer in self.layers:
|
||||
result.append(str(layer))
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
@@ -25,9 +25,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class DDPGActorHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
|
||||
dense_layer=Dense):
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
self.batchnorm = batchnorm
|
||||
|
||||
|
||||
@@ -56,7 +59,7 @@ class DDPGActor(Head):
|
||||
pre_activation_policy_values_mean = self.dense_layer(self.num_actions)(input_layer, name='fc_mean')
|
||||
policy_values_mean = batchnorm_activation_dropout(pre_activation_policy_values_mean, self.batchnorm,
|
||||
self.activation_function,
|
||||
False, 0, 0)[-1]
|
||||
False, 0, is_training=False, name="BatchnormActivationDropout_0")[-1]
|
||||
self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')
|
||||
|
||||
if self.is_local:
|
||||
@@ -66,3 +69,9 @@ class DDPGActor(Head):
|
||||
[self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
|
||||
|
||||
self.output = [self.policy_mean]
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
'Dense (num outputs = {})'.format(self.num_actions[0])
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DNDQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class DNDQHead(QHead):
|
||||
@@ -89,3 +93,9 @@ class DNDQHead(QHead):
|
||||
# DND gradients
|
||||
self.dnd_embeddings_grad = tf.gradients(self.loss[0], self.dnd_embeddings)
|
||||
self.dnd_values_grad = tf.gradients(self.loss[0], self.dnd_values)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"DND fetch (num outputs = {})".format(self.num_actions)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DuelingQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name, dense_layer=dense_layer)
|
||||
|
||||
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
class DuelingQHead(QHead):
|
||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||
@@ -51,3 +55,16 @@ class DuelingQHead(QHead):
|
||||
|
||||
# merge to state-action value function Q
|
||||
self.output = tf.add(self.state_value, self.action_advantage, name='output')
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = 1)",
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import Type
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops.losses.losses_impl import Reduction
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.base_parameters import AgentParameters, Parameters, NetworkComponentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list
|
||||
@@ -35,10 +35,14 @@ def normalized_columns_initializer(std=1.0):
|
||||
|
||||
class HeadParameters(NetworkComponentParameters):
|
||||
def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
|
||||
dense_layer=Dense):
|
||||
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
|
||||
loss_weight: float=1.0, dense_layer=Dense):
|
||||
super().__init__(dense_layer=dense_layer)
|
||||
self.activation_function = activation_function
|
||||
self.name = name
|
||||
self.num_output_head_copies = num_output_head_copies
|
||||
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
|
||||
self.loss_weight = loss_weight
|
||||
self.parameterized_class_name = parameterized_class.__name__
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class MeasurementsPredictionHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
|
||||
dense_layer=Dense):
|
||||
super().__init__(parameterized_class=MeasurementsPredictionHead,
|
||||
activation_function=activation_function, name=name, dense_layer=dense_layer)
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class MeasurementsPredictionHead(Head):
|
||||
@@ -68,3 +71,17 @@ class MeasurementsPredictionHead(Head):
|
||||
targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target)
|
||||
self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0))
|
||||
tf.losses.add_loss(self.loss_weight[0] * self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 256)",
|
||||
"\tDense (num outputs = {})".format(self.multi_step_measurements_size),
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 256)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions * self.multi_step_measurements_size),
|
||||
"\tReshape (new size = {} x {})".format(self.num_actions, self.multi_step_measurements_size),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class NAFHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class NAFHead(Head):
|
||||
@@ -90,3 +94,21 @@ class NAFHead(Head):
|
||||
self.Q = tf.add(self.V, self.A, name='Q')
|
||||
|
||||
self.output = self.Q
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 1)",
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = {})".format((self.num_actions * (self.num_actions + 1)) / 2),
|
||||
"\tReshape to lower triangular matrix L (new size = {} x {})".format(self.num_actions, self.num_actions),
|
||||
"\tP = L*L^T",
|
||||
"\tA = -1/2 * (u - mu)^T * P * (u - mu)",
|
||||
"Action Stream - mu",
|
||||
"\tDense (num outputs = {})".format(self.num_actions),
|
||||
"\tActivation (type = {})".format(self.activation_function.__name__),
|
||||
"\tMultiply (factor = {})".format(self.output_scale),
|
||||
"State-Action Value Stream - Q",
|
||||
"\tAdd (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -17,20 +17,25 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import eps
|
||||
from rl_coach.utils import eps, indent_string
|
||||
|
||||
|
||||
class PolicyHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
|
||||
class PolicyHead(Head):
|
||||
@@ -112,7 +117,7 @@ class PolicyHead(Head):
|
||||
self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions"))
|
||||
|
||||
# output activation function
|
||||
if np.all(self.spaces.action.max_abs_range < np.inf):
|
||||
if np.all(action_space.max_abs_range < np.inf):
|
||||
# bounded actions
|
||||
self.output_scale = action_space.max_abs_range
|
||||
self.continuous_output_activation = self.activation_function
|
||||
@@ -158,3 +163,45 @@ class PolicyHead(Head):
|
||||
if self.action_penalty and self.action_penalty != 0:
|
||||
self.regularizations += [
|
||||
self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
|
||||
|
||||
def __str__(self):
|
||||
action_spaces = [self.spaces.action]
|
||||
if isinstance(self.spaces.action, CompoundActionSpace):
|
||||
action_spaces = self.spaces.action.sub_action_spaces
|
||||
|
||||
result = []
|
||||
for action_space_idx, action_space in enumerate(action_spaces):
|
||||
action_head_mean_result = []
|
||||
if isinstance(action_space, DiscreteActionSpace):
|
||||
# create a discrete action network (softmax probabilities output)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(len(action_space.actions)))
|
||||
action_head_mean_result.append("Softmax")
|
||||
elif isinstance(action_space, BoxActionSpace):
|
||||
# create a continuous action network (bounded mean and stdev outputs)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(action_space.shape))
|
||||
if np.all(action_space.max_abs_range < np.inf):
|
||||
# bounded actions
|
||||
action_head_mean_result.append("Activation (type = {})".format(self.activation_function.__name__))
|
||||
action_head_mean_result.append("Multiply (factor = {})".format(action_space.max_abs_range))
|
||||
|
||||
action_head_stdev_result = []
|
||||
if isinstance(self.exploration_policy, ContinuousEntropyParameters):
|
||||
action_head_stdev_result.append("Dense (num outputs = {})".format(action_space.shape))
|
||||
action_head_stdev_result.append("Softplus")
|
||||
|
||||
action_head_result = []
|
||||
if action_head_stdev_result:
|
||||
action_head_result.append("Mean Stream")
|
||||
action_head_result.append(indent_string('\n'.join(action_head_mean_result)))
|
||||
action_head_result.append("Stdev Stream")
|
||||
action_head_result.append(indent_string('\n'.join(action_head_stdev_result)))
|
||||
else:
|
||||
action_head_result.append('\n'.join(action_head_mean_result))
|
||||
|
||||
if len(action_spaces) > 1:
|
||||
result.append("Action head {}".format(action_space_idx))
|
||||
result.append(indent_string('\n'.join(action_head_result)))
|
||||
else:
|
||||
result.append('\n'.join(action_head_result))
|
||||
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
@@ -27,9 +27,13 @@ from rl_coach.utils import eps
|
||||
|
||||
|
||||
class PPOHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class PPOHead(Head):
|
||||
@@ -146,3 +150,15 @@ class PPOHead(Head):
|
||||
self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)
|
||||
|
||||
self.output = [self.policy_mean, self.policy_std]
|
||||
|
||||
def __str__(self):
|
||||
action_head_mean_result = []
|
||||
if isinstance(self.spaces.action, DiscreteActionSpace):
|
||||
# create a discrete action network (softmax probabilities output)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(len(self.spaces.action.actions)))
|
||||
action_head_mean_result.append("Softmax")
|
||||
elif isinstance(self.spaces.action, BoxActionSpace):
|
||||
# create a continuous action network (bounded mean and stdev outputs)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(self.spaces.action.shape))
|
||||
|
||||
return '\n'.join(action_head_mean_result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class PPOVHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class PPOVHead(Head):
|
||||
@@ -55,3 +59,9 @@ class PPOVHead(Head):
|
||||
self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
|
||||
self.loss = self.vf_loss
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = 1)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpac
|
||||
|
||||
|
||||
class QHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class QHead(Head):
|
||||
@@ -51,5 +55,10 @@ class QHead(Head):
|
||||
# Standard Q Network
|
||||
self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class QuantileRegressionQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
|
||||
dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function,
|
||||
name=name, dense_layer=dense_layer)
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class QuantileRegressionQHead(Head):
|
||||
@@ -79,3 +82,11 @@ class QuantileRegressionQHead(Head):
|
||||
quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms)
|
||||
self.loss = quantile_regression_loss
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"Reshape (new size = {} x {})".format(self.num_actions, self.num_atoms)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class RainbowQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class RainbowQHead(Head):
|
||||
@@ -69,3 +73,17 @@ class RainbowQHead(Head):
|
||||
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_atoms),
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"\tReshape (new size = {} x {})".format(self.num_actions, self.num_atoms),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)",
|
||||
"Softmax"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class VHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='v_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class VHead(Head):
|
||||
@@ -48,3 +52,9 @@ class VHead(Head):
|
||||
# Standard V Network
|
||||
self.output = self.dense_layer(1)(input_layer, name='output',
|
||||
kernel_initializer=normalized_columns_initializer(1.0))
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = 1)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
167
rl_coach/architectures/tensorflow_components/layers.py
Normal file
167
rl_coach/architectures/tensorflow_components/layers.py
Normal file
@@ -0,0 +1,167 @@
|
||||
import math
|
||||
from typing import List, Union
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout, dropout_rate, is_training, name):
|
||||
layers = [input_layer]
|
||||
|
||||
# batchnorm
|
||||
if batchnorm:
|
||||
layers.append(
|
||||
tf.layers.batch_normalization(layers[-1], name="{}_batchnorm".format(name), training=is_training)
|
||||
)
|
||||
|
||||
# activation
|
||||
if activation_function:
|
||||
layers.append(
|
||||
activation_function(layers[-1], name="{}_activation".format(name))
|
||||
)
|
||||
|
||||
# dropout
|
||||
if dropout:
|
||||
layers.append(
|
||||
tf.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training)
|
||||
)
|
||||
|
||||
# remove the input layer from the layers list
|
||||
del layers[0]
|
||||
|
||||
return layers
|
||||
|
||||
|
||||
class Conv2d(object):
|
||||
def __init__(self, num_filters: int, kernel_size: int, strides: int):
|
||||
self.num_filters = num_filters
|
||||
self.kernel_size = kernel_size
|
||||
self.strides = strides
|
||||
|
||||
def __call__(self, input_layer, name: str=None, is_training=None):
|
||||
"""
|
||||
returns a tensorflow conv2d layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:return: conv2d layer
|
||||
"""
|
||||
return tf.layers.conv2d(input_layer, filters=self.num_filters, kernel_size=self.kernel_size,
|
||||
strides=self.strides, data_format='channels_last', name=name)
|
||||
|
||||
def __str__(self):
|
||||
return "Convolution (num filters = {}, kernel size = {}, stride = {})"\
|
||||
.format(self.num_filters, self.kernel_size, self.strides)
|
||||
|
||||
|
||||
class BatchnormActivationDropout(object):
|
||||
def __init__(self, batchnorm: bool=False, activation_function=None, dropout_rate: float=0):
|
||||
self.batchnorm = batchnorm
|
||||
self.activation_function = activation_function
|
||||
self.dropout_rate = dropout_rate
|
||||
|
||||
def __call__(self, input_layer, name: str=None, is_training=None):
|
||||
"""
|
||||
returns a list of tensorflow batchnorm, activation and dropout layers
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:return: batchnorm, activation and dropout layers
|
||||
"""
|
||||
return batchnorm_activation_dropout(input_layer, batchnorm=self.batchnorm,
|
||||
activation_function=self.activation_function,
|
||||
dropout=self.dropout_rate > 0, dropout_rate=self.dropout_rate,
|
||||
is_training=is_training, name=name)
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
if self.batchnorm:
|
||||
result += ["Batch Normalization"]
|
||||
if self.activation_function:
|
||||
result += ["Activation (type = {})".format(self.activation_function.__name__)]
|
||||
if self.dropout_rate > 0:
|
||||
result += ["Dropout (rate = {})".format(self.dropout_rate)]
|
||||
return "\n".join(result)
|
||||
|
||||
|
||||
class Dense(object):
|
||||
def __init__(self, units: int):
|
||||
self.units = units
|
||||
|
||||
def __call__(self, input_layer, name: str=None, kernel_initializer=None, activation=None, is_training=None):
|
||||
"""
|
||||
returns a tensorflow dense layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:return: dense layer
|
||||
"""
|
||||
return tf.layers.dense(input_layer, self.units, name=name, kernel_initializer=kernel_initializer,
|
||||
activation=activation)
|
||||
|
||||
def __str__(self):
|
||||
return "Dense (num outputs = {})".format(self.units)
|
||||
|
||||
|
||||
class NoisyNetDense(object):
|
||||
"""
|
||||
A factorized Noisy Net layer
|
||||
|
||||
https://arxiv.org/abs/1706.10295.
|
||||
"""
|
||||
|
||||
def __init__(self, units: int):
|
||||
self.units = units
|
||||
self.sigma0 = 0.5
|
||||
|
||||
def __call__(self, input_layer, name: str, kernel_initializer=None, activation=None, is_training=None):
|
||||
"""
|
||||
returns a NoisyNet dense layer
|
||||
:param input_layer: previous layer
|
||||
:param name: layer name
|
||||
:param kernel_initializer: initializer for kernels. Default is to use Gaussian noise that preserves stddev.
|
||||
:param activation: the activation function
|
||||
:return: dense layer
|
||||
"""
|
||||
#TODO: noise sampling should be externally controlled. DQN is fine with sampling noise for every
|
||||
# forward (either act or train, both for online and target networks).
|
||||
# A3C, on the other hand, should sample noise only when policy changes (i.e. after every t_max steps)
|
||||
|
||||
num_inputs = input_layer.get_shape()[-1].value
|
||||
num_outputs = self.units
|
||||
|
||||
stddev = 1 / math.sqrt(num_inputs)
|
||||
activation = activation if activation is not None else (lambda x: x)
|
||||
|
||||
if kernel_initializer is None:
|
||||
kernel_mean_initializer = tf.random_uniform_initializer(-stddev, stddev)
|
||||
kernel_stddev_initializer = tf.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0)
|
||||
else:
|
||||
kernel_mean_initializer = kernel_stddev_initializer = kernel_initializer
|
||||
with tf.variable_scope(None, default_name=name):
|
||||
weight_mean = tf.get_variable('weight_mean', shape=(num_inputs, num_outputs),
|
||||
initializer=kernel_mean_initializer)
|
||||
bias_mean = tf.get_variable('bias_mean', shape=(num_outputs,), initializer=tf.zeros_initializer())
|
||||
|
||||
weight_stddev = tf.get_variable('weight_stddev', shape=(num_inputs, num_outputs),
|
||||
initializer=kernel_stddev_initializer)
|
||||
bias_stddev = tf.get_variable('bias_stddev', shape=(num_outputs,),
|
||||
initializer=kernel_stddev_initializer)
|
||||
bias_noise = self.f(tf.random_normal((num_outputs,)))
|
||||
weight_noise = self.factorized_noise(num_inputs, num_outputs)
|
||||
|
||||
bias = bias_mean + bias_stddev * bias_noise
|
||||
weight = weight_mean + weight_stddev * weight_noise
|
||||
return activation(tf.matmul(input_layer, weight) + bias)
|
||||
|
||||
def factorized_noise(self, inputs, outputs):
|
||||
# TODO: use factorized noise only for compute intensive algos (e.g. DQN).
|
||||
# lighter algos (e.g. DQN) should not use it
|
||||
noise1 = self.f(tf.random_normal((inputs, 1)))
|
||||
noise2 = self.f(tf.random_normal((1, outputs)))
|
||||
return tf.matmul(noise1, noise2)
|
||||
|
||||
@staticmethod
|
||||
def f(values):
|
||||
return tf.sqrt(tf.abs(values)) * tf.sign(values)
|
||||
|
||||
def __str__(self):
|
||||
return "Noisy Dense (num outputs = {})".format(self.units)
|
||||
@@ -17,46 +17,41 @@ from typing import Union, List
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
from rl_coach.core_types import Middleware_FC_Embedding
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
class FCMiddlewareParameters(MiddlewareParameters):
|
||||
def __init__(self, activation_function='relu',
|
||||
scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium,
|
||||
batchnorm: bool = False, dropout: bool = False,
|
||||
name="middleware_fc_embedder", dense_layer=Dense):
|
||||
name="middleware_fc_embedder", dense_layer=Dense, is_training=False):
|
||||
super().__init__(parameterized_class=FCMiddleware, activation_function=activation_function,
|
||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer)
|
||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
||||
is_training=is_training)
|
||||
|
||||
|
||||
class FCMiddleware(Middleware):
|
||||
def __init__(self, activation_function=tf.nn.relu,
|
||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||
batchnorm: bool = False, dropout: bool = False,
|
||||
name="middleware_fc_embedder", dense_layer=Dense):
|
||||
name="middleware_fc_embedder", dense_layer=Dense, is_training=False):
|
||||
super().__init__(activation_function=activation_function, batchnorm=batchnorm,
|
||||
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer)
|
||||
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer, is_training=is_training)
|
||||
self.return_type = Middleware_FC_Embedding
|
||||
self.layers = []
|
||||
|
||||
def _build_module(self):
|
||||
self.layers.append(self.input)
|
||||
|
||||
if isinstance(self.scheme, MiddlewareScheme):
|
||||
layers_params = self.schemes[self.scheme]
|
||||
else:
|
||||
layers_params = self.scheme
|
||||
for idx, layer_params in enumerate(layers_params):
|
||||
self.layers.append(
|
||||
layer_params(self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
|
||||
)
|
||||
|
||||
self.layers.extend(batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
|
||||
self.activation_function, self.dropout,
|
||||
self.dropout_rate, idx))
|
||||
for idx, layer_params in enumerate(self.layers_params):
|
||||
self.layers.extend(force_list(
|
||||
layer_params(self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx),
|
||||
is_training=self.is_training)
|
||||
))
|
||||
|
||||
self.output = self.layers[-1]
|
||||
|
||||
@@ -69,20 +64,20 @@ class FCMiddleware(Middleware):
|
||||
# ppo
|
||||
MiddlewareScheme.Shallow:
|
||||
[
|
||||
self.dense_layer([64])
|
||||
self.dense_layer(64)
|
||||
],
|
||||
|
||||
# dqn
|
||||
MiddlewareScheme.Medium:
|
||||
[
|
||||
self.dense_layer([512])
|
||||
self.dense_layer(512)
|
||||
],
|
||||
|
||||
MiddlewareScheme.Deep: \
|
||||
[
|
||||
self.dense_layer([128]),
|
||||
self.dense_layer([128]),
|
||||
self.dense_layer([128])
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128)
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -18,19 +18,21 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
|
||||
from rl_coach.base_parameters import MiddlewareScheme
|
||||
from rl_coach.core_types import Middleware_LSTM_Embedding
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
class LSTMMiddlewareParameters(MiddlewareParameters):
|
||||
def __init__(self, activation_function='relu', number_of_lstm_cells=256,
|
||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||
batchnorm: bool = False, dropout: bool = False,
|
||||
name="middleware_lstm_embedder", dense_layer=Dense):
|
||||
name="middleware_lstm_embedder", dense_layer=Dense, is_training=False):
|
||||
super().__init__(parameterized_class=LSTMMiddleware, activation_function=activation_function,
|
||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer)
|
||||
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
|
||||
is_training=is_training)
|
||||
self.number_of_lstm_cells = number_of_lstm_cells
|
||||
|
||||
|
||||
@@ -38,9 +40,9 @@ class LSTMMiddleware(Middleware):
|
||||
def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256,
|
||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||
batchnorm: bool = False, dropout: bool = False,
|
||||
name="middleware_lstm_embedder", dense_layer=Dense):
|
||||
name="middleware_lstm_embedder", dense_layer=Dense, is_training=False):
|
||||
super().__init__(activation_function=activation_function, batchnorm=batchnorm,
|
||||
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer)
|
||||
dropout=dropout, scheme=scheme, name=name, dense_layer=dense_layer, is_training=is_training)
|
||||
self.return_type = Middleware_LSTM_Embedding
|
||||
self.number_of_lstm_cells = number_of_lstm_cells
|
||||
self.layers = []
|
||||
@@ -57,19 +59,12 @@ class LSTMMiddleware(Middleware):
|
||||
|
||||
self.layers.append(self.input)
|
||||
|
||||
# optionally insert some dense layers before the LSTM
|
||||
if isinstance(self.scheme, MiddlewareScheme):
|
||||
layers_params = self.schemes[self.scheme]
|
||||
else:
|
||||
layers_params = self.scheme
|
||||
for idx, layer_params in enumerate(layers_params):
|
||||
self.layers.append(
|
||||
tf.layers.dense(self.layers[-1], layer_params[0], name='fc{}'.format(idx))
|
||||
)
|
||||
|
||||
self.layers.extend(batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
|
||||
self.activation_function, self.dropout,
|
||||
self.dropout_rate, idx))
|
||||
# optionally insert some layers before the LSTM
|
||||
for idx, layer_params in enumerate(self.layers_params):
|
||||
self.layers.extend(force_list(
|
||||
layer_params(self.layers[-1], name='fc{}'.format(idx),
|
||||
is_training=self.is_training)
|
||||
))
|
||||
|
||||
# add the LSTM layer
|
||||
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells, state_is_tuple=True)
|
||||
@@ -97,20 +92,20 @@ class LSTMMiddleware(Middleware):
|
||||
# ppo
|
||||
MiddlewareScheme.Shallow:
|
||||
[
|
||||
[64]
|
||||
self.dense_layer(64)
|
||||
],
|
||||
|
||||
# dqn
|
||||
MiddlewareScheme.Medium:
|
||||
[
|
||||
[512]
|
||||
self.dense_layer(512)
|
||||
],
|
||||
|
||||
MiddlewareScheme.Deep: \
|
||||
[
|
||||
[128],
|
||||
[128],
|
||||
[128]
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128),
|
||||
self.dense_layer(128)
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -13,25 +13,27 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import copy
|
||||
from typing import Type, Union, List
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.base_parameters import MiddlewareScheme, Parameters, NetworkComponentParameters
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense, BatchnormActivationDropout
|
||||
from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameters
|
||||
from rl_coach.core_types import MiddlewareEmbedding
|
||||
|
||||
|
||||
class MiddlewareParameters(NetworkComponentParameters):
|
||||
def __init__(self, parameterized_class: Type['Middleware'],
|
||||
activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium,
|
||||
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense):
|
||||
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense, is_training=False):
|
||||
super().__init__(dense_layer=dense_layer)
|
||||
self.activation_function = activation_function
|
||||
self.scheme = scheme
|
||||
self.batchnorm = batchnorm
|
||||
self.dropout = dropout
|
||||
self.name = name
|
||||
self.is_training = is_training
|
||||
self.parameterized_class_name = parameterized_class.__name__
|
||||
|
||||
|
||||
@@ -43,7 +45,8 @@ class Middleware(object):
|
||||
"""
|
||||
def __init__(self, activation_function=tf.nn.relu,
|
||||
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
|
||||
batchnorm: bool = False, dropout: bool = False, name="middleware_embedder", dense_layer=Dense):
|
||||
batchnorm: bool = False, dropout: bool = False, name="middleware_embedder", dense_layer=Dense,
|
||||
is_training=False):
|
||||
self.name = name
|
||||
self.input = None
|
||||
self.output = None
|
||||
@@ -54,6 +57,23 @@ class Middleware(object):
|
||||
self.scheme = scheme
|
||||
self.return_type = MiddlewareEmbedding
|
||||
self.dense_layer = dense_layer
|
||||
self.is_training = is_training
|
||||
|
||||
# layers order is conv -> batchnorm -> activation -> dropout
|
||||
if isinstance(self.scheme, MiddlewareScheme):
|
||||
self.layers_params = copy.copy(self.schemes[self.scheme])
|
||||
else:
|
||||
self.layers_params = copy.copy(self.scheme)
|
||||
|
||||
# we allow adding batchnorm, dropout or activation functions after each layer.
|
||||
# The motivation is to simplify the transition between a network with batchnorm and a network without
|
||||
# batchnorm to a single flag (the same applies to activation function and dropout)
|
||||
if self.batchnorm or self.activation_function or self.dropout:
|
||||
for layer_idx in reversed(range(len(self.layers_params))):
|
||||
self.layers_params.insert(layer_idx+1,
|
||||
BatchnormActivationDropout(batchnorm=self.batchnorm,
|
||||
activation_function=self.activation_function,
|
||||
dropout_rate=self.dropout_rate))
|
||||
|
||||
def __call__(self, input_layer):
|
||||
with tf.variable_scope(self.get_name()):
|
||||
@@ -72,3 +92,10 @@ class Middleware(object):
|
||||
def schemes(self):
|
||||
raise NotImplementedError("Inheriting middleware must define schemes matching its allowed default "
|
||||
"configurations.")
|
||||
|
||||
def __str__(self):
|
||||
result = [str(l) for l in self.layers_params]
|
||||
if self.layers_params:
|
||||
return '\n'.join(result)
|
||||
else:
|
||||
return 'No layers'
|
||||
|
||||
Reference in New Issue
Block a user