From 2046358ab0633f5b2aac71159d16b43354f5002c Mon Sep 17 00:00:00 2001 From: Sina Afrooze Date: Tue, 30 Oct 2018 02:02:37 -0700 Subject: [PATCH] Add docstring for architecture (#47) - Removed get_model() from architecture because it is only implementation detail of architecture. --- rl_coach/architectures/architecture.py | 153 ++++++++++++++++-- .../tensorflow_components/architecture.py | 8 + 2 files changed, 146 insertions(+), 15 deletions(-) diff --git a/rl_coach/architectures/architecture.py b/rl_coach/architectures/architecture.py index 2d0377a..1ae2d47 100644 --- a/rl_coach/architectures/architecture.py +++ b/rl_coach/architectures/architecture.py @@ -14,6 +14,10 @@ # limitations under the License. # +from typing import Any, Dict, List, Tuple + +import numpy as np + from rl_coach.base_parameters import AgentParameters from rl_coach.spaces import SpacesDefinition @@ -21,15 +25,15 @@ from rl_coach.spaces import SpacesDefinition class Architecture(object): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""): """ + Creates a neural network 'architecture', that can be trained and used for inference. + :param agent_parameters: the agent parameters :param spaces: the spaces (observation, action, etc.) definition of the agent :param name: the name of the network """ - # spaces self.spaces = spaces - self.name = name - self.network_wrapper_name = self.name.split('/')[0] # the name can be main/online and the network_wrapper_name will be main + self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main' self.full_name = "{}/{}".format(agent_parameters.full_name_id, name) self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name] self.batch_size = self.network_parameters.batch_size @@ -37,35 +41,154 @@ class Architecture(object): self.optimizer = None self.ap = agent_parameters - def get_model(self): + def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]: + """ + Given input observations, use the model to make predictions (e.g. action or value). + + :param inputs: current state (i.e. observations, measurements, goals, etc.) + (e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size)) + :return: predictions of action or value of shape (batch_size, action_space_size) for action predictions) + """ pass - def predict(self, inputs): + def train_on_batch(self, + inputs: Dict[str, np.ndarray], + targets: List[np.ndarray], + scaler: float=1., + additional_fetches: list=None, + importance_weights: np.ndarray=None) -> tuple: + """ + Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a + forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to + update the weights. + Calls `accumulate_gradients` followed by `apply_and_reset_gradients`. + Note: Currently an unused method. + + :param inputs: typically the environment states (but can also contain other data necessary for loss). + (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or + (batch_size, observation_space_size, stack_size) or + `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) + :param targets: target values of shape (batch_size, ). For example discounted rewards for value network + for calculating the value-network loss would be a target. Length of list and order of arrays in + the list matches that of network losses which are defined by network parameters + :param scaler: value to scale gradients by before optimizing network weights + :param additional_fetches: list of additional values to fetch and return. The type of each list + element is framework dependent. + :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. + :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors + total_loss (float): sum of all head losses + losses (list of float): list of all losses. The order is list of target losses followed by list + of regularization losses. The specifics of losses is dependant on the network parameters + (number of heads, etc.) + norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied + fetched_tensors: all values for additional_fetches + """ pass - def train_on_batch(self, inputs, targets): + def get_weights(self) -> List[np.ndarray]: + """ + Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks. + + :return: list weights as ndarray + """ pass - def get_weights(self): + def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None: + """ + Sets model weights for provided layer parameters. + + :param weights: list of model weights in the same order as received in get_weights + :param rate: controls the mixture of given weight values versus old weight values. + i.e. new_weight = rate * given_weight + (1 - rate) * old_weight + :return: None + """ pass - def set_weights(self, weights, rate=1.0): + def reset_accumulated_gradients(self) -> None: + """ + Sets gradient of all parameters to 0. + + Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class, + which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set. + """ pass - def reset_accumulated_gradients(self): + def accumulate_gradients(self, + inputs: Dict[str, np.ndarray], + targets: List[np.ndarray], + additional_fetches: list=None, + importance_weights: np.ndarray=None, + no_accumulation: bool=False) ->\ + Tuple[float, List[float], float, list]: + """ + Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the + gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient + values if required and then accumulate gradients from all learners. It does not update the model weights, + that's performed in `apply_and_reset_gradients` method. + + Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å + + :param inputs: typically the environment states (but can also contain other data for loss) + (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or + (batch_size, observation_space_size, stack_size) or + `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) + :param targets: targets for calculating loss. For example discounted rewards for value network + for calculating the value-network loss would be a target. Length of list and order of arrays in + the list matches that of network losses which are defined by network parameters + :param additional_fetches: list of additional values to fetch and return. The type of each list + element is framework dependent. + :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. + :param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously + calculated gradients + :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors + total_loss (float): sum of all head losses + losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses. + The specifics of losses is dependant on the network parameters (number of heads, etc.) + norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied + fetched_tensors: all values for additional_fetches + """ pass - def accumulate_gradients(self, inputs, targets): + def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None: + """ + Applies the given gradients to the network weights and resets the gradient accumulations. + Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`. + + :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property + of an identical network (either self or another identical network) + """ pass - def apply_and_reset_gradients(self, gradients): + def apply_gradients(self, gradients: List[np.ndarray]) -> None: + """ + Applies the given gradients to the network weights. + Will be performed sync or async depending on `network_parameters.async_training` + + :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property + of an identical network (either self or another identical network) + """ pass - def apply_gradients(self, gradients): + def get_variable_value(self, variable: Any) -> np.ndarray: + """ + Gets value of a specified variable. Type of variable is dependant on the framework. + Example of a variable is head.kl_coefficient, which could be a symbol for evaluation + or could be a string representing the value. + + :param variable: variable of interest + :return: value of the specified variable + """ pass - def get_variable_value(self, variable): - pass + def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any): + """ + Updates the value of a specified variable. Type of assign_op is dependant on the framework + and is a unique identifier for assigning value to a variable. For example an agent may use + head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder + (in the example above, placeholder would be head.kl_coefficient_ph). - def set_variable_value(self, assign_op, value, placeholder=None): + :param assign_op: a parameter representing the operation for assigning value to a specific variable + :param value: value of the specified variable used for update + :param placeholder: a placeholder for binding the value to assign_op. + """ pass diff --git a/rl_coach/architectures/tensorflow_components/architecture.py b/rl_coach/architectures/tensorflow_components/architecture.py index e731920..7c5c248 100644 --- a/rl_coach/architectures/tensorflow_components/architecture.py +++ b/rl_coach/architectures/tensorflow_components/architecture.py @@ -146,6 +146,14 @@ class TensorFlowArchitecture(Architecture): # set the fetches for training self._set_initial_fetch_list() + def get_model(self) -> None: + """ + Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`, + `output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`, + `current_learning_rate`, and `optimizer` + """ + raise NotImplementedError + def _set_initial_fetch_list(self): """ Create an initial list of tensors to fetch in each training iteration