1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

Add docstring for architecture (#47)

- Removed get_model() from architecture because it is only implementation detail of architecture.
This commit is contained in:
Sina Afrooze
2018-10-30 02:02:37 -07:00
committed by Gal Leibovich
parent 324c67d614
commit 2046358ab0
2 changed files with 146 additions and 15 deletions

View File

@@ -14,6 +14,10 @@
# limitations under the License.
#
from typing import Any, Dict, List, Tuple
import numpy as np
from rl_coach.base_parameters import AgentParameters
from rl_coach.spaces import SpacesDefinition
@@ -21,15 +25,15 @@ from rl_coach.spaces import SpacesDefinition
class Architecture(object):
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""):
"""
Creates a neural network 'architecture', that can be trained and used for inference.
:param agent_parameters: the agent parameters
:param spaces: the spaces (observation, action, etc.) definition of the agent
:param name: the name of the network
"""
# spaces
self.spaces = spaces
self.name = name
self.network_wrapper_name = self.name.split('/')[0] # the name can be main/online and the network_wrapper_name will be main
self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main'
self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
self.batch_size = self.network_parameters.batch_size
@@ -37,35 +41,154 @@ class Architecture(object):
self.optimizer = None
self.ap = agent_parameters
def get_model(self):
def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]:
"""
Given input observations, use the model to make predictions (e.g. action or value).
:param inputs: current state (i.e. observations, measurements, goals, etc.)
(e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size))
:return: predictions of action or value of shape (batch_size, action_space_size) for action predictions)
"""
pass
def predict(self, inputs):
def train_on_batch(self,
inputs: Dict[str, np.ndarray],
targets: List[np.ndarray],
scaler: float=1.,
additional_fetches: list=None,
importance_weights: np.ndarray=None) -> tuple:
"""
Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
update the weights.
Calls `accumulate_gradients` followed by `apply_and_reset_gradients`.
Note: Currently an unused method.
:param inputs: typically the environment states (but can also contain other data necessary for loss).
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
(batch_size, observation_space_size, stack_size) or
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
:param targets: target values of shape (batch_size, ). For example discounted rewards for value network
for calculating the value-network loss would be a target. Length of list and order of arrays in
the list matches that of network losses which are defined by network parameters
:param scaler: value to scale gradients by before optimizing network weights
:param additional_fetches: list of additional values to fetch and return. The type of each list
element is framework dependent.
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
total_loss (float): sum of all head losses
losses (list of float): list of all losses. The order is list of target losses followed by list
of regularization losses. The specifics of losses is dependant on the network parameters
(number of heads, etc.)
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
fetched_tensors: all values for additional_fetches
"""
pass
def train_on_batch(self, inputs, targets):
def get_weights(self) -> List[np.ndarray]:
"""
Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.
:return: list weights as ndarray
"""
pass
def get_weights(self):
def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None:
"""
Sets model weights for provided layer parameters.
:param weights: list of model weights in the same order as received in get_weights
:param rate: controls the mixture of given weight values versus old weight values.
i.e. new_weight = rate * given_weight + (1 - rate) * old_weight
:return: None
"""
pass
def set_weights(self, weights, rate=1.0):
def reset_accumulated_gradients(self) -> None:
"""
Sets gradient of all parameters to 0.
Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class,
which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set.
"""
pass
def reset_accumulated_gradients(self):
def accumulate_gradients(self,
inputs: Dict[str, np.ndarray],
targets: List[np.ndarray],
additional_fetches: list=None,
importance_weights: np.ndarray=None,
no_accumulation: bool=False) ->\
Tuple[float, List[float], float, list]:
"""
Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
values if required and then accumulate gradients from all learners. It does not update the model weights,
that's performed in `apply_and_reset_gradients` method.
Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å
:param inputs: typically the environment states (but can also contain other data for loss)
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
(batch_size, observation_space_size, stack_size) or
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
:param targets: targets for calculating loss. For example discounted rewards for value network
for calculating the value-network loss would be a target. Length of list and order of arrays in
the list matches that of network losses which are defined by network parameters
:param additional_fetches: list of additional values to fetch and return. The type of each list
element is framework dependent.
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
:param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously
calculated gradients
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
total_loss (float): sum of all head losses
losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses.
The specifics of losses is dependant on the network parameters (number of heads, etc.)
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
fetched_tensors: all values for additional_fetches
"""
pass
def accumulate_gradients(self, inputs, targets):
def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None:
"""
Applies the given gradients to the network weights and resets the gradient accumulations.
Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`.
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
of an identical network (either self or another identical network)
"""
pass
def apply_and_reset_gradients(self, gradients):
def apply_gradients(self, gradients: List[np.ndarray]) -> None:
"""
Applies the given gradients to the network weights.
Will be performed sync or async depending on `network_parameters.async_training`
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
of an identical network (either self or another identical network)
"""
pass
def apply_gradients(self, gradients):
def get_variable_value(self, variable: Any) -> np.ndarray:
"""
Gets value of a specified variable. Type of variable is dependant on the framework.
Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
or could be a string representing the value.
:param variable: variable of interest
:return: value of the specified variable
"""
pass
def get_variable_value(self, variable):
pass
def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any):
"""
Updates the value of a specified variable. Type of assign_op is dependant on the framework
and is a unique identifier for assigning value to a variable. For example an agent may use
head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
(in the example above, placeholder would be head.kl_coefficient_ph).
def set_variable_value(self, assign_op, value, placeholder=None):
:param assign_op: a parameter representing the operation for assigning value to a specific variable
:param value: value of the specified variable used for update
:param placeholder: a placeholder for binding the value to assign_op.
"""
pass

View File

@@ -146,6 +146,14 @@ class TensorFlowArchitecture(Architecture):
# set the fetches for training
self._set_initial_fetch_list()
def get_model(self) -> None:
"""
Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`,
`output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`,
`current_learning_rate`, and `optimizer`
"""
raise NotImplementedError
def _set_initial_fetch_list(self):
"""
Create an initial list of tensors to fetch in each training iteration