1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-14 05:35:55 +01:00

network_imporvements branch merge

This commit is contained in:
Shadi Endrawis
2018-10-02 13:41:46 +03:00
parent 72ea933384
commit 51726a5b80
110 changed files with 1639 additions and 1161 deletions

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
class CategoricalQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class CategoricalQHead(Head):
@@ -54,3 +58,12 @@ class CategoricalQHead(Head):
self.target = self.distributions
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
tf.losses.add_loss(self.loss)
def __str__(self):
result = [
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
"Reshape (output size = {} x {})".format(self.num_actions, self.num_atoms),
"Softmax"
]
return '\n'.join(result)

View File

@@ -16,27 +16,34 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace
from rl_coach.utils import force_list
class RegressionHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class RegressionHead(Head):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
dense_layer=Dense):
dense_layer=Dense, scheme=[Dense(256), Dense(256)]):
super().__init__(agent_parameters, spaces, network_name, head_idx, loss_weight, is_local, activation_function,
dense_layer=dense_layer)
self.name = 'regression_head'
self.scheme = scheme
self.layers = []
if isinstance(self.spaces.action, BoxActionSpace):
self.num_actions = self.spaces.action.shape[0]
elif isinstance(self.spaces.action, DiscreteActionSpace):
@@ -48,9 +55,18 @@ class RegressionHead(Head):
self.loss_type = tf.losses.mean_squared_error
def _build_module(self, input_layer):
self.fc1 = self.dense_layer(256)(input_layer)
self.fc2 = self.dense_layer(256)(self.fc1)
self.output = self.dense_layer(self.num_actions)(self.fc2, name='output')
self.layers.append(input_layer)
for idx, layer_params in enumerate(self.scheme):
self.layers.extend(force_list(
layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
))
self.layers.append(self.dense_layer(self.num_actions)(self.layers[-1], name='output'))
self.output = self.layers[-1]
def __str__(self):
result = []
for layer in self.layers:
result.append(str(layer))
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
@@ -25,9 +25,12 @@ from rl_coach.spaces import SpacesDefinition
class DDPGActorHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True,
dense_layer=Dense):
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
self.batchnorm = batchnorm
@@ -56,7 +59,7 @@ class DDPGActor(Head):
pre_activation_policy_values_mean = self.dense_layer(self.num_actions)(input_layer, name='fc_mean')
policy_values_mean = batchnorm_activation_dropout(pre_activation_policy_values_mean, self.batchnorm,
self.activation_function,
False, 0, 0)[-1]
False, 0, is_training=False, name="BatchnormActivationDropout_0")[-1]
self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')
if self.is_local:
@@ -66,3 +69,9 @@ class DDPGActor(Head):
[self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
self.output = [self.policy_mean]
def __str__(self):
result = [
'Dense (num outputs = {})'.format(self.num_actions[0])
]
return '\n'.join(result)

View File

@@ -15,7 +15,7 @@
#
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
from rl_coach.base_parameters import AgentParameters
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
class DNDQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class DNDQHead(QHead):
@@ -89,3 +93,9 @@ class DNDQHead(QHead):
# DND gradients
self.dnd_embeddings_grad = tf.gradients(self.loss[0], self.dnd_embeddings)
self.dnd_values_grad = tf.gradients(self.loss[0], self.dnd_values)
def __str__(self):
result = [
"DND fetch (num outputs = {})".format(self.num_actions)
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters
from rl_coach.architectures.tensorflow_components.heads.q_head import QHead
from rl_coach.base_parameters import AgentParameters
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
class DuelingQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', dense_layer=Dense):
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name, dense_layer=dense_layer)
def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class DuelingQHead(QHead):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
@@ -51,3 +55,16 @@ class DuelingQHead(QHead):
# merge to state-action value function Q
self.output = tf.add(self.state_value, self.action_advantage, name='output')
def __str__(self):
result = [
"State Value Stream - V",
"\tDense (num outputs = 512)",
"\tDense (num outputs = 1)",
"Action Advantage Stream - A",
"\tDense (num outputs = 512)",
"\tDense (num outputs = {})".format(self.num_actions),
"\tSubtract(A, Mean(A))".format(self.num_actions),
"Add (V, A)"
]
return '\n'.join(result)

View File

@@ -18,7 +18,7 @@ from typing import Type
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.losses.losses_impl import Reduction
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.base_parameters import AgentParameters, Parameters, NetworkComponentParameters
from rl_coach.spaces import SpacesDefinition
from rl_coach.utils import force_list
@@ -35,10 +35,14 @@ def normalized_columns_initializer(std=1.0):
class HeadParameters(NetworkComponentParameters):
def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head',
dense_layer=Dense):
num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0,
loss_weight: float=1.0, dense_layer=Dense):
super().__init__(dense_layer=dense_layer)
self.activation_function = activation_function
self.name = name
self.num_output_head_copies = num_output_head_copies
self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor
self.loss_weight = loss_weight
self.parameterized_class_name = parameterized_class.__name__

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
class MeasurementsPredictionHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params',
dense_layer=Dense):
super().__init__(parameterized_class=MeasurementsPredictionHead,
activation_function=activation_function, name=name, dense_layer=dense_layer)
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class MeasurementsPredictionHead(Head):
@@ -68,3 +71,17 @@ class MeasurementsPredictionHead(Head):
targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target)
self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0))
tf.losses.add_loss(self.loss_weight[0] * self.loss)
def __str__(self):
result = [
"State Value Stream - V",
"\tDense (num outputs = 256)",
"\tDense (num outputs = {})".format(self.multi_step_measurements_size),
"Action Advantage Stream - A",
"\tDense (num outputs = 256)",
"\tDense (num outputs = {})".format(self.num_actions * self.multi_step_measurements_size),
"\tReshape (new size = {} x {})".format(self.num_actions, self.multi_step_measurements_size),
"\tSubtract(A, Mean(A))".format(self.num_actions),
"Add (V, A)"
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
class NAFHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='tanh', name: str='naf_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class NAFHead(Head):
@@ -90,3 +94,21 @@ class NAFHead(Head):
self.Q = tf.add(self.V, self.A, name='Q')
self.output = self.Q
def __str__(self):
result = [
"State Value Stream - V",
"\tDense (num outputs = 1)",
"Action Advantage Stream - A",
"\tDense (num outputs = {})".format((self.num_actions * (self.num_actions + 1)) / 2),
"\tReshape to lower triangular matrix L (new size = {} x {})".format(self.num_actions, self.num_actions),
"\tP = L*L^T",
"\tA = -1/2 * (u - mu)^T * P * (u - mu)",
"Action Stream - mu",
"\tDense (num outputs = {})".format(self.num_actions),
"\tActivation (type = {})".format(self.activation_function.__name__),
"\tMultiply (factor = {})".format(self.output_scale),
"State-Action Value Stream - Q",
"\tAdd (V, A)"
]
return '\n'.join(result)

View File

@@ -17,20 +17,25 @@
import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, CompoundActionSpace
from rl_coach.spaces import SpacesDefinition
from rl_coach.utils import eps
from rl_coach.utils import eps, indent_string
class PolicyHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='tanh', name: str='policy_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PolicyHead(Head):
@@ -112,7 +117,7 @@ class PolicyHead(Head):
self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions"))
# output activation function
if np.all(self.spaces.action.max_abs_range < np.inf):
if np.all(action_space.max_abs_range < np.inf):
# bounded actions
self.output_scale = action_space.max_abs_range
self.continuous_output_activation = self.activation_function
@@ -158,3 +163,45 @@ class PolicyHead(Head):
if self.action_penalty and self.action_penalty != 0:
self.regularizations += [
self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]
def __str__(self):
action_spaces = [self.spaces.action]
if isinstance(self.spaces.action, CompoundActionSpace):
action_spaces = self.spaces.action.sub_action_spaces
result = []
for action_space_idx, action_space in enumerate(action_spaces):
action_head_mean_result = []
if isinstance(action_space, DiscreteActionSpace):
# create a discrete action network (softmax probabilities output)
action_head_mean_result.append("Dense (num outputs = {})".format(len(action_space.actions)))
action_head_mean_result.append("Softmax")
elif isinstance(action_space, BoxActionSpace):
# create a continuous action network (bounded mean and stdev outputs)
action_head_mean_result.append("Dense (num outputs = {})".format(action_space.shape))
if np.all(action_space.max_abs_range < np.inf):
# bounded actions
action_head_mean_result.append("Activation (type = {})".format(self.activation_function.__name__))
action_head_mean_result.append("Multiply (factor = {})".format(action_space.max_abs_range))
action_head_stdev_result = []
if isinstance(self.exploration_policy, ContinuousEntropyParameters):
action_head_stdev_result.append("Dense (num outputs = {})".format(action_space.shape))
action_head_stdev_result.append("Softplus")
action_head_result = []
if action_head_stdev_result:
action_head_result.append("Mean Stream")
action_head_result.append(indent_string('\n'.join(action_head_mean_result)))
action_head_result.append("Stdev Stream")
action_head_result.append(indent_string('\n'.join(action_head_stdev_result)))
else:
action_head_result.append('\n'.join(action_head_mean_result))
if len(action_spaces) > 1:
result.append("Action head {}".format(action_space_idx))
result.append(indent_string('\n'.join(action_head_result)))
else:
result.append('\n'.join(action_head_result))
return '\n'.join(result)

View File

@@ -17,7 +17,7 @@
import numpy as np
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import ActionProbabilities
@@ -27,9 +27,13 @@ from rl_coach.utils import eps
class PPOHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PPOHead(Head):
@@ -146,3 +150,15 @@ class PPOHead(Head):
self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)
self.output = [self.policy_mean, self.policy_std]
def __str__(self):
action_head_mean_result = []
if isinstance(self.spaces.action, DiscreteActionSpace):
# create a discrete action network (softmax probabilities output)
action_head_mean_result.append("Dense (num outputs = {})".format(len(self.spaces.action.actions)))
action_head_mean_result.append("Softmax")
elif isinstance(self.spaces.action, BoxActionSpace):
# create a continuous action network (bounded mean and stdev outputs)
action_head_mean_result.append("Dense (num outputs = {})".format(self.spaces.action.shape))
return '\n'.join(action_head_mean_result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
class PPOVHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class PPOVHead(Head):
@@ -55,3 +59,9 @@ class PPOVHead(Head):
self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
self.loss = self.vf_loss
tf.losses.add_loss(self.loss)
def __str__(self):
result = [
"Dense (num outputs = 1)"
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpac
class QHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='q_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class QHead(Head):
@@ -51,5 +55,10 @@ class QHead(Head):
# Standard Q Network
self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
def __str__(self):
result = [
"Dense (num outputs = {})".format(self.num_actions)
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -26,9 +26,12 @@ from rl_coach.spaces import SpacesDefinition
class QuantileRegressionQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params',
dense_layer=Dense):
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function,
name=name, dense_layer=dense_layer)
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class QuantileRegressionQHead(Head):
@@ -79,3 +82,11 @@ class QuantileRegressionQHead(Head):
quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms)
self.loss = quantile_regression_loss
tf.losses.add_loss(self.loss)
def __str__(self):
result = [
"Dense (num outputs = {})".format(self.num_actions * self.num_atoms),
"Reshape (new size = {} x {})".format(self.num_actions, self.num_atoms)
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head
from rl_coach.base_parameters import AgentParameters
from rl_coach.core_types import QActionStateValue
@@ -24,9 +24,13 @@ from rl_coach.spaces import SpacesDefinition
class RainbowQHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class RainbowQHead(Head):
@@ -69,3 +73,17 @@ class RainbowQHead(Head):
self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
tf.losses.add_loss(self.loss)
def __str__(self):
result = [
"State Value Stream - V",
"\tDense (num outputs = 512)",
"\tDense (num outputs = {})".format(self.num_atoms),
"Action Advantage Stream - A",
"\tDense (num outputs = 512)",
"\tDense (num outputs = {})".format(self.num_actions * self.num_atoms),
"\tReshape (new size = {} x {})".format(self.num_actions, self.num_atoms),
"\tSubtract(A, Mean(A))".format(self.num_actions),
"Add (V, A)",
"Softmax"
]
return '\n'.join(result)

View File

@@ -16,7 +16,7 @@
import tensorflow as tf
from rl_coach.architectures.tensorflow_components.architecture import Dense
from rl_coach.architectures.tensorflow_components.layers import Dense
from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters
from rl_coach.base_parameters import AgentParameters
@@ -25,9 +25,13 @@ from rl_coach.spaces import SpacesDefinition
class VHeadParameters(HeadParameters):
def __init__(self, activation_function: str ='relu', name: str='v_head_params', dense_layer=Dense):
def __init__(self, activation_function: str ='relu', name: str='v_head_params',
num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
loss_weight: float = 1.0, dense_layer=Dense):
super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name,
dense_layer=dense_layer)
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
class VHead(Head):
@@ -48,3 +52,9 @@ class VHead(Head):
# Standard V Network
self.output = self.dense_layer(1)(input_layer, name='output',
kernel_initializer=normalized_columns_initializer(1.0))
def __str__(self):
result = [
"Dense (num outputs = 1)"
]
return '\n'.join(result)