1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00

Move embedder, middleware, and head parameters to framework agnostic modules. (#45)

Part of #28
This commit is contained in:
Sina Afrooze
2018-10-29 14:46:40 -07:00
committed by Scott Leishman
parent 16b3e99f37
commit a888226641
60 changed files with 410 additions and 330 deletions

View File

@@ -0,0 +1,4 @@
from .image_embedder import ImageEmbedder
from .vector_embedder import VectorEmbedder
__all__ = ['ImageEmbedder', 'VectorEmbedder']

View File

@@ -28,35 +28,6 @@ from rl_coach.core_types import InputEmbedding
from rl_coach.utils import force_list
class InputEmbedderParameters(NetworkComponentParameters):
def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium,
batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None,
input_clipping=None, dense_layer=Dense, is_training=False):
super().__init__(dense_layer=dense_layer)
self.activation_function = activation_function
self.scheme = scheme
self.batchnorm = batchnorm
self.dropout = dropout
if input_rescaling is None:
input_rescaling = {'image': 255.0, 'vector': 1.0}
if input_offset is None:
input_offset = {'image': 0.0, 'vector': 0.0}
self.input_rescaling = input_rescaling
self.input_offset = input_offset
self.input_clipping = input_clipping
self.name = name
self.is_training = is_training
@property
def path(self):
return {
"image": 'image_embedder:ImageEmbedder',
"vector": 'vector_embedder:VectorEmbedder'
}
class InputEmbedder(object):
"""
An input embedder is the first part of the network, which takes the input from the state and produces a vector
@@ -83,6 +54,8 @@ class InputEmbedder(object):
self.input_offset = input_offset
self.input_clipping = input_clipping
self.dense_layer = dense_layer
if self.dense_layer is None:
self.dense_layer = Dense
self.is_training = is_training
# layers order is conv -> batchnorm -> activation -> dropout

View File

@@ -20,10 +20,10 @@ from typing import Dict
import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
from rl_coach.architectures.head_parameters import HeadParameters
from rl_coach.architectures.middleware_parameters import MiddlewareParameters
from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters
from rl_coach.base_parameters import AgentParameters, EmbeddingMergerType
from rl_coach.core_types import PredictionType
from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace
@@ -136,15 +136,17 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}"
.format(input_name, allowed_inputs.keys()))
type = "vector"
if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
type = "image"
mod_names = {'image': 'ImageEmbedder', 'vector': 'VectorEmbedder'}
embedder_path = 'rl_coach.architectures.tensorflow_components.embedders.' + embedder_params.path[type]
emb_type = "vector"
if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
emb_type = "image"
embedder_path = 'rl_coach.architectures.tensorflow_components.embedders:' + mod_names[emb_type]
embedder_params_copy = copy.copy(embedder_params)
embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function)
embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[type]
embedder_params_copy.input_offset = embedder_params_copy.input_offset[type]
embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[emb_type]
embedder_params_copy.input_offset = embedder_params_copy.input_offset[emb_type]
embedder_params_copy.name = input_name
module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy,
path=embedder_path,
@@ -157,25 +159,25 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
:param middleware_params: the paramaeters of the middleware class
:return: the middleware instance
"""
mod_name = middleware_params.parameterized_class_name
middleware_path = 'rl_coach.architectures.tensorflow_components.middlewares:' + mod_name
middleware_params_copy = copy.copy(middleware_params)
middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function)
module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy)
module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy, path=middleware_path)
return module
def get_output_head(self, head_params: HeadParameters, head_idx: int):
"""
Given a head type, creates the head and returns it
:param head_params: the parameters of the head to create
:param head_type: the path to the class of the head under the embedders directory or a full path to a head class.
the path should be in the following structure: <module_path>:<class_path>
:param head_idx: the head index
:param loss_weight: the weight to assign for the embedders loss
:return: the head
"""
mod_name = head_params.parameterized_class_name
head_path = 'rl_coach.architectures.tensorflow_components.heads:' + mod_name
head_params_copy = copy.copy(head_params)
head_params_copy.activation_function = self.get_activation_function(head_params_copy.activation_function)
return dynamic_import_and_instantiate_module_from_params(head_params_copy, extra_kwargs={
return dynamic_import_and_instantiate_module_from_params(head_params_copy, path=head_path, extra_kwargs={
'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
'head_idx': head_idx, 'is_local': self.network_is_local})

View File

@@ -0,0 +1,29 @@
from .categorical_q_head import CategoricalQHead
from .ddpg_actor_head import DDPGActor
from .dnd_q_head import DNDQHead
from .dueling_q_head import DuelingQHead
from .measurements_prediction_head import MeasurementsPredictionHead
from .naf_head import NAFHead
from .policy_head import PolicyHead
from .ppo_head import PPOHead
from .ppo_v_head import PPOVHead
from .q_head import QHead
from .quantile_regression_q_head import QuantileRegressionQHead
from .rainbow_q_head import RainbowQHead
from .v_head import VHead
__all__ = [
'CategoricalQHead',
'DDPGActor',
'DNDQHead',
'DuelingQHead',
'MeasurementsPredictionHead',
'NAFHead',
'PolicyHead',
'PPOHead',
'PPOVHead',
'QHead',
'QuantileRegressionQHead',
'RainbowQHead',
'VHead'
]

View File

@@ -18,22 +18,12 @@ import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition
class CategoricalQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class CategoricalQHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str ='relu',

View File

@@ -16,25 +16,14 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
from rl_coach.utils import force_list
class RegressionHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class RegressionHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
from rl_coach.spaces import SpacesDefinition
class DDPGActorHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
self.batchnorm = batchnorm
class DDPGActor(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',

View File

@@ -16,23 +16,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
from rl_coach.base_parameters import AgentParameters
from rl_coach.memories.non_episodic import differentiable_neural_dictionary
from rl_coach.spaces import SpacesDefinition
class DNDQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class DNDQHead(QHead):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,21 +17,11 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
from rl_coach.base_parameters import AgentParameters
from rl_coach.spaces import SpacesDefinition
class DuelingQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class DuelingQHead(QHead):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -33,19 +33,6 @@ def normalized_columns_initializer(std=1.0):
return _initializer
class HeadParameters(NetworkComponentParameters):
def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
loss_weight: float=1.0, dense_layer=Dense):
super().__init__(dense_layer=dense_layer)
self.activation_function = activation_function
self.name = name
self.num_output_head_copies = num_output_head_copies
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
self.loss_weight = loss_weight
self.parameterized_class_name = parameterized_class.__name__
class Head(object):
"""
A head is the final part of the network. It takes the embedding from the middleware embedder and passes it through
@@ -74,6 +61,8 @@ class Head(object):
self.return_type = None
self.activation_function = activation_function
self.dense_layer = dense_layer
if self.dense_layer is None:
self.dense_layer = Dense
def __call__(self, input_layer):
"""

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import Measurements
from rl_coach.spaces import SpacesDefinition
class MeasurementsPredictionHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class MeasurementsPredictionHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,23 +17,13 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import BoxActionSpace
from rl_coach.spaces import SpacesDefinition
class NAFHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class NAFHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True,activation_function: str='relu',

View File

@@ -18,7 +18,7 @@ import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
@@ -27,17 +27,6 @@ from rl_coach.spaces import SpacesDefinition
from rl_coach.utils import eps, indent_string
class PolicyHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PolicyHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',

View File

@@ -18,7 +18,7 @@ import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
@@ -26,16 +26,6 @@ from rl_coach.spaces import SpacesDefinition
from rl_coach.utils import eps
class PPOHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PPOHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh',

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
from rl_coach.spaces import SpacesDefinition
class PPOVHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PPOVHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
class QHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class QHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition
class QuantileRegressionQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class QuantileRegressionQHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,22 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
from rl_coach.architectures.tensorflow_components.heads.head import Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition
class RainbowQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class RainbowQHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -17,23 +17,12 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import VStateValue
from rl_coach.spaces import SpacesDefinition
class VHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class VHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',

View File

@@ -0,0 +1,4 @@
from .fc_middleware import FCMiddleware
from .lstm_middleware import LSTMMiddleware
__all__ = ["FCMiddleware", "LSTMMiddleware"]

View File

@@ -18,22 +18,12 @@ from typing import Union, List
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware
from rl_coach.base_parameters import MiddlewareScheme
from rl_coach.core_types import Middleware_FC_Embedding
from rl_coach.utils import force_list
class FCMiddlewareParameters(MiddlewareParameters):
def __init__(self, activation_function='relu',
scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_fc_embedder", dense_layer=Dense, is_training=False):
super().__init__(parameterized_class=FCMiddleware, activation_function=activation_function,
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
is_training=is_training)
class FCMiddleware(Middleware):
def __init__(self, activation_function=tf.nn.relu,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,

View File

@@ -19,23 +19,12 @@ import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters
from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware
from rl_coach.base_parameters import MiddlewareScheme
from rl_coach.core_types import Middleware_LSTM_Embedding
from rl_coach.utils import force_list
class LSTMMiddlewareParameters(MiddlewareParameters):
def __init__(self, activation_function='relu', number_of_lstm_cells=256,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,
batchnorm: bool = False, dropout: bool = False,
name="middleware_lstm_embedder", dense_layer=Dense, is_training=False):
super().__init__(parameterized_class=LSTMMiddleware, activation_function=activation_function,
scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer,
is_training=is_training)
self.number_of_lstm_cells = number_of_lstm_cells
class LSTMMiddleware(Middleware):
def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256,
scheme: MiddlewareScheme = MiddlewareScheme.Medium,

View File

@@ -14,7 +14,6 @@
# limitations under the License.
#
import copy
from typing import Type, Union, List
import tensorflow as tf
@@ -23,20 +22,6 @@ from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameter
from rl_coach.core_types import MiddlewareEmbedding
class MiddlewareParameters(NetworkComponentParameters):
def __init__(self, parameterized_class: Type['Middleware'],
activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium,
batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense, is_training=False):
super().__init__(dense_layer=dense_layer)
self.activation_function = activation_function
self.scheme = scheme
self.batchnorm = batchnorm
self.dropout = dropout
self.name = name
self.is_training = is_training
self.parameterized_class_name = parameterized_class.__name__
class Middleware(object):
"""
A middleware embedder is the middle part of the network. It takes the embeddings from the input embedders,
@@ -57,6 +42,8 @@ class Middleware(object):
self.scheme = scheme
self.return_type = MiddlewareEmbedding
self.dense_layer = dense_layer
if self.dense_layer is None:
self.dense_layer = Dense
self.is_training = is_training
# layers order is conv -> batchnorm -> activation -> dropout