mirror of
https://github.com/gryf/coach.git
synced 2026-03-14 05:35:55 +01:00
network_imporvements branch merge
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class CategoricalQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class CategoricalQHead(Head):
|
||||
@@ -54,3 +58,12 @@ class CategoricalQHead(Head):
|
||||
self.target = self.distributions
|
||||
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"Reshape (output size = {} x {})".format(self.num_actions, self.num_atoms),
|
||||
"Softmax"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,27 +16,34 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
|
||||
from rl_coach.utils import force_list
|
||||
|
||||
|
||||
class RegressionHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
|
||||
super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class RegressionHead(Head):
|
||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
|
||||
dense_layer=Dense):
|
||||
dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
|
||||
super().__init__(agent_parameters, spaces, network_name, head_idx, loss_weight, is_local, activation_function,
|
||||
dense_layer=dense_layer)
|
||||
self.name = 'regression_head'
|
||||
self.scheme = scheme
|
||||
self.layers = []
|
||||
if isinstance(self.spaces.action, BoxActionSpace):
|
||||
self.num_actions = self.spaces.action.shape[0]
|
||||
elif isinstance(self.spaces.action, DiscreteActionSpace):
|
||||
@@ -48,9 +55,18 @@ class RegressionHead(Head):
|
||||
self.loss_type = tf.losses.mean_squared_error
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
self.fc1 = self.dense_layer(256)(input_layer)
|
||||
self.fc2 = self.dense_layer(256)(self.fc1)
|
||||
self.output = self.dense_layer(self.num_actions)(self.fc2, name='output')
|
||||
self.layers.append(input_layer)
|
||||
for idx, layer_params in enumerate(self.scheme):
|
||||
self.layers.extend(force_list(
|
||||
layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
|
||||
))
|
||||
|
||||
self.layers.append(self.dense_layer(self.num_actions)(self.layers[-1], name='output'))
|
||||
self.output = self.layers[-1]
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
for layer in self.layers:
|
||||
result.append(str(layer))
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
@@ -25,9 +25,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class DDPGActorHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
|
||||
dense_layer=Dense):
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
self.batchnorm = batchnorm
|
||||
|
||||
|
||||
@@ -56,7 +59,7 @@ class DDPGActor(Head):
|
||||
pre_activation_policy_values_mean = self.dense_layer(self.num_actions)(input_layer, name='fc_mean')
|
||||
policy_values_mean = batchnorm_activation_dropout(pre_activation_policy_values_mean, self.batchnorm,
|
||||
self.activation_function,
|
||||
False, 0, 0)[-1]
|
||||
False, 0, is_training=False, name="BatchnormActivationDropout_0")[-1]
|
||||
self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')
|
||||
|
||||
if self.is_local:
|
||||
@@ -66,3 +69,9 @@ class DDPGActor(Head):
|
||||
[self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
|
||||
|
||||
self.output = [self.policy_mean]
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
'Dense (num outputs = {})'.format(self.num_actions[0])
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DNDQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class DNDQHead(QHead):
|
||||
@@ -89,3 +93,9 @@ class DNDQHead(QHead):
|
||||
# DND gradients
|
||||
self.dnd_embeddings_grad = tf.gradients(self.loss[0], self.dnd_embeddings)
|
||||
self.dnd_values_grad = tf.gradients(self.loss[0], self.dnd_values)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"DND fetch (num outputs = {})".format(self.num_actions)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
|
||||
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class DuelingQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name, dense_layer=dense_layer)
|
||||
|
||||
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
class DuelingQHead(QHead):
|
||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
|
||||
@@ -51,3 +55,16 @@ class DuelingQHead(QHead):
|
||||
|
||||
# merge to state-action value function Q
|
||||
self.output = tf.add(self.state_value, self.action_advantage, name='output')
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = 1)",
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -18,7 +18,7 @@ from typing import Type
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops.losses.losses_impl import Reduction
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.base_parameters import AgentParameters, Parameters, NetworkComponentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import force_list
|
||||
@@ -35,10 +35,14 @@ def normalized_columns_initializer(std=1.0):
|
||||
|
||||
class HeadParameters(NetworkComponentParameters):
|
||||
def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
|
||||
dense_layer=Dense):
|
||||
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
|
||||
loss_weight: float=1.0, dense_layer=Dense):
|
||||
super().__init__(dense_layer=dense_layer)
|
||||
self.activation_function = activation_function
|
||||
self.name = name
|
||||
self.num_output_head_copies = num_output_head_copies
|
||||
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
|
||||
self.loss_weight = loss_weight
|
||||
self.parameterized_class_name = parameterized_class.__name__
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class MeasurementsPredictionHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
|
||||
dense_layer=Dense):
|
||||
super().__init__(parameterized_class=MeasurementsPredictionHead,
|
||||
activation_function=activation_function, name=name, dense_layer=dense_layer)
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class MeasurementsPredictionHead(Head):
|
||||
@@ -68,3 +71,17 @@ class MeasurementsPredictionHead(Head):
|
||||
targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target)
|
||||
self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0))
|
||||
tf.losses.add_loss(self.loss_weight[0] * self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 256)",
|
||||
"\tDense (num outputs = {})".format(self.multi_step_measurements_size),
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 256)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions * self.multi_step_measurements_size),
|
||||
"\tReshape (new size = {} x {})".format(self.num_actions, self.multi_step_measurements_size),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class NAFHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class NAFHead(Head):
|
||||
@@ -90,3 +94,21 @@ class NAFHead(Head):
|
||||
self.Q = tf.add(self.V, self.A, name='Q')
|
||||
|
||||
self.output = self.Q
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 1)",
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = {})".format((self.num_actions * (self.num_actions + 1)) / 2),
|
||||
"\tReshape to lower triangular matrix L (new size = {} x {})".format(self.num_actions, self.num_actions),
|
||||
"\tP = L*L^T",
|
||||
"\tA = -1/2 * (u - mu)^T * P * (u - mu)",
|
||||
"Action Stream - mu",
|
||||
"\tDense (num outputs = {})".format(self.num_actions),
|
||||
"\tActivation (type = {})".format(self.activation_function.__name__),
|
||||
"\tMultiply (factor = {})".format(self.output_scale),
|
||||
"State-Action Value Stream - Q",
|
||||
"\tAdd (V, A)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -17,20 +17,25 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
from rl_coach.utils import eps
|
||||
from rl_coach.utils import eps, indent_string
|
||||
|
||||
|
||||
class PolicyHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
|
||||
class PolicyHead(Head):
|
||||
@@ -112,7 +117,7 @@ class PolicyHead(Head):
|
||||
self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions"))
|
||||
|
||||
# output activation function
|
||||
if np.all(self.spaces.action.max_abs_range < np.inf):
|
||||
if np.all(action_space.max_abs_range < np.inf):
|
||||
# bounded actions
|
||||
self.output_scale = action_space.max_abs_range
|
||||
self.continuous_output_activation = self.activation_function
|
||||
@@ -158,3 +163,45 @@ class PolicyHead(Head):
|
||||
if self.action_penalty and self.action_penalty != 0:
|
||||
self.regularizations += [
|
||||
self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
|
||||
|
||||
def __str__(self):
|
||||
action_spaces = [self.spaces.action]
|
||||
if isinstance(self.spaces.action, CompoundActionSpace):
|
||||
action_spaces = self.spaces.action.sub_action_spaces
|
||||
|
||||
result = []
|
||||
for action_space_idx, action_space in enumerate(action_spaces):
|
||||
action_head_mean_result = []
|
||||
if isinstance(action_space, DiscreteActionSpace):
|
||||
# create a discrete action network (softmax probabilities output)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(len(action_space.actions)))
|
||||
action_head_mean_result.append("Softmax")
|
||||
elif isinstance(action_space, BoxActionSpace):
|
||||
# create a continuous action network (bounded mean and stdev outputs)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(action_space.shape))
|
||||
if np.all(action_space.max_abs_range < np.inf):
|
||||
# bounded actions
|
||||
action_head_mean_result.append("Activation (type = {})".format(self.activation_function.__name__))
|
||||
action_head_mean_result.append("Multiply (factor = {})".format(action_space.max_abs_range))
|
||||
|
||||
action_head_stdev_result = []
|
||||
if isinstance(self.exploration_policy, ContinuousEntropyParameters):
|
||||
action_head_stdev_result.append("Dense (num outputs = {})".format(action_space.shape))
|
||||
action_head_stdev_result.append("Softplus")
|
||||
|
||||
action_head_result = []
|
||||
if action_head_stdev_result:
|
||||
action_head_result.append("Mean Stream")
|
||||
action_head_result.append(indent_string('\n'.join(action_head_mean_result)))
|
||||
action_head_result.append("Stdev Stream")
|
||||
action_head_result.append(indent_string('\n'.join(action_head_stdev_result)))
|
||||
else:
|
||||
action_head_result.append('\n'.join(action_head_mean_result))
|
||||
|
||||
if len(action_spaces) > 1:
|
||||
result.append("Action head {}".format(action_space_idx))
|
||||
result.append(indent_string('\n'.join(action_head_result)))
|
||||
else:
|
||||
result.append('\n'.join(action_head_result))
|
||||
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import ActionProbabilities
|
||||
@@ -27,9 +27,13 @@ from rl_coach.utils import eps
|
||||
|
||||
|
||||
class PPOHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class PPOHead(Head):
|
||||
@@ -146,3 +150,15 @@ class PPOHead(Head):
|
||||
self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)
|
||||
|
||||
self.output = [self.policy_mean, self.policy_std]
|
||||
|
||||
def __str__(self):
|
||||
action_head_mean_result = []
|
||||
if isinstance(self.spaces.action, DiscreteActionSpace):
|
||||
# create a discrete action network (softmax probabilities output)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(len(self.spaces.action.actions)))
|
||||
action_head_mean_result.append("Softmax")
|
||||
elif isinstance(self.spaces.action, BoxActionSpace):
|
||||
# create a continuous action network (bounded mean and stdev outputs)
|
||||
action_head_mean_result.append("Dense (num outputs = {})".format(self.spaces.action.shape))
|
||||
|
||||
return '\n'.join(action_head_mean_result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class PPOVHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class PPOVHead(Head):
|
||||
@@ -55,3 +59,9 @@ class PPOVHead(Head):
|
||||
self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
|
||||
self.loss = self.vf_loss
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = 1)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpac
|
||||
|
||||
|
||||
class QHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class QHead(Head):
|
||||
@@ -51,5 +55,10 @@ class QHead(Head):
|
||||
# Standard Q Network
|
||||
self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
class QuantileRegressionQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
|
||||
dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function,
|
||||
name=name, dense_layer=dense_layer)
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class QuantileRegressionQHead(Head):
|
||||
@@ -79,3 +82,11 @@ class QuantileRegressionQHead(Head):
|
||||
quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms)
|
||||
self.loss = quantile_regression_loss
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"Reshape (new size = {} x {})".format(self.num_actions, self.num_atoms)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.core_types import QActionStateValue
|
||||
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class RainbowQHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class RainbowQHead(Head):
|
||||
@@ -69,3 +73,17 @@ class RainbowQHead(Head):
|
||||
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
|
||||
tf.losses.add_loss(self.loss)
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"State Value Stream - V",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_atoms),
|
||||
"Action Advantage Stream - A",
|
||||
"\tDense (num outputs = 512)",
|
||||
"\tDense (num outputs = {})".format(self.num_actions * self.num_atoms),
|
||||
"\tReshape (new size = {} x {})".format(self.num_actions, self.num_atoms),
|
||||
"\tSubtract(A, Mean(A))".format(self.num_actions),
|
||||
"Add (V, A)",
|
||||
"Softmax"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.architecture import Dense
|
||||
from rl_coach.architectures.tensorflow_components.layers import Dense
|
||||
|
||||
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
|
||||
class VHeadParameters(HeadParameters):
|
||||
def __init__(self, activation_function: str ='relu', name: str='v_head_params', dense_layer=Dense):
|
||||
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
|
||||
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
|
||||
loss_weight: float = 1.0, dense_layer=Dense):
|
||||
super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
|
||||
dense_layer=dense_layer)
|
||||
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
|
||||
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
|
||||
loss_weight=loss_weight)
|
||||
|
||||
|
||||
class VHead(Head):
|
||||
@@ -48,3 +52,9 @@ class VHead(Head):
|
||||
# Standard V Network
|
||||
self.output = self.dense_layer(1)(input_layer, name='output',
|
||||
kernel_initializer=normalized_columns_initializer(1.0))
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = 1)"
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
Reference in New Issue
Block a user