mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Add docstring for architecture (#47)
- Removed get_model() from architecture because it is only implementation detail of architecture.
This commit is contained in:
committed by
Gal Leibovich
parent
324c67d614
commit
2046358ab0
@@ -14,6 +14,10 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.base_parameters import AgentParameters
|
||||
from rl_coach.spaces import SpacesDefinition
|
||||
|
||||
@@ -21,15 +25,15 @@ from rl_coach.spaces import SpacesDefinition
|
||||
class Architecture(object):
|
||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""):
|
||||
"""
|
||||
Creates a neural network 'architecture', that can be trained and used for inference.
|
||||
|
||||
:param agent_parameters: the agent parameters
|
||||
:param spaces: the spaces (observation, action, etc.) definition of the agent
|
||||
:param name: the name of the network
|
||||
"""
|
||||
# spaces
|
||||
self.spaces = spaces
|
||||
|
||||
self.name = name
|
||||
self.network_wrapper_name = self.name.split('/')[0] # the name can be main/online and the network_wrapper_name will be main
|
||||
self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main'
|
||||
self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
|
||||
self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
|
||||
self.batch_size = self.network_parameters.batch_size
|
||||
@@ -37,35 +41,154 @@ class Architecture(object):
|
||||
self.optimizer = None
|
||||
self.ap = agent_parameters
|
||||
|
||||
def get_model(self):
|
||||
def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]:
|
||||
"""
|
||||
Given input observations, use the model to make predictions (e.g. action or value).
|
||||
|
||||
:param inputs: current state (i.e. observations, measurements, goals, etc.)
|
||||
(e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size))
|
||||
:return: predictions of action or value of shape (batch_size, action_space_size) for action predictions)
|
||||
"""
|
||||
pass
|
||||
|
||||
def predict(self, inputs):
|
||||
def train_on_batch(self,
|
||||
inputs: Dict[str, np.ndarray],
|
||||
targets: List[np.ndarray],
|
||||
scaler: float=1.,
|
||||
additional_fetches: list=None,
|
||||
importance_weights: np.ndarray=None) -> tuple:
|
||||
"""
|
||||
Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
|
||||
forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
|
||||
update the weights.
|
||||
Calls `accumulate_gradients` followed by `apply_and_reset_gradients`.
|
||||
Note: Currently an unused method.
|
||||
|
||||
:param inputs: typically the environment states (but can also contain other data necessary for loss).
|
||||
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
|
||||
(batch_size, observation_space_size, stack_size) or
|
||||
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
|
||||
:param targets: target values of shape (batch_size, ). For example discounted rewards for value network
|
||||
for calculating the value-network loss would be a target. Length of list and order of arrays in
|
||||
the list matches that of network losses which are defined by network parameters
|
||||
:param scaler: value to scale gradients by before optimizing network weights
|
||||
:param additional_fetches: list of additional values to fetch and return. The type of each list
|
||||
element is framework dependent.
|
||||
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
|
||||
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
|
||||
total_loss (float): sum of all head losses
|
||||
losses (list of float): list of all losses. The order is list of target losses followed by list
|
||||
of regularization losses. The specifics of losses is dependant on the network parameters
|
||||
(number of heads, etc.)
|
||||
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
|
||||
fetched_tensors: all values for additional_fetches
|
||||
"""
|
||||
pass
|
||||
|
||||
def train_on_batch(self, inputs, targets):
|
||||
def get_weights(self) -> List[np.ndarray]:
|
||||
"""
|
||||
Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.
|
||||
|
||||
:return: list weights as ndarray
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_weights(self):
|
||||
def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None:
|
||||
"""
|
||||
Sets model weights for provided layer parameters.
|
||||
|
||||
:param weights: list of model weights in the same order as received in get_weights
|
||||
:param rate: controls the mixture of given weight values versus old weight values.
|
||||
i.e. new_weight = rate * given_weight + (1 - rate) * old_weight
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def set_weights(self, weights, rate=1.0):
|
||||
def reset_accumulated_gradients(self) -> None:
|
||||
"""
|
||||
Sets gradient of all parameters to 0.
|
||||
|
||||
Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class,
|
||||
which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set.
|
||||
"""
|
||||
pass
|
||||
|
||||
def reset_accumulated_gradients(self):
|
||||
def accumulate_gradients(self,
|
||||
inputs: Dict[str, np.ndarray],
|
||||
targets: List[np.ndarray],
|
||||
additional_fetches: list=None,
|
||||
importance_weights: np.ndarray=None,
|
||||
no_accumulation: bool=False) ->\
|
||||
Tuple[float, List[float], float, list]:
|
||||
"""
|
||||
Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
|
||||
gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
|
||||
values if required and then accumulate gradients from all learners. It does not update the model weights,
|
||||
that's performed in `apply_and_reset_gradients` method.
|
||||
|
||||
Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å
|
||||
|
||||
:param inputs: typically the environment states (but can also contain other data for loss)
|
||||
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
|
||||
(batch_size, observation_space_size, stack_size) or
|
||||
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
|
||||
:param targets: targets for calculating loss. For example discounted rewards for value network
|
||||
for calculating the value-network loss would be a target. Length of list and order of arrays in
|
||||
the list matches that of network losses which are defined by network parameters
|
||||
:param additional_fetches: list of additional values to fetch and return. The type of each list
|
||||
element is framework dependent.
|
||||
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
|
||||
:param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously
|
||||
calculated gradients
|
||||
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
|
||||
total_loss (float): sum of all head losses
|
||||
losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses.
|
||||
The specifics of losses is dependant on the network parameters (number of heads, etc.)
|
||||
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
|
||||
fetched_tensors: all values for additional_fetches
|
||||
"""
|
||||
pass
|
||||
|
||||
def accumulate_gradients(self, inputs, targets):
|
||||
def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None:
|
||||
"""
|
||||
Applies the given gradients to the network weights and resets the gradient accumulations.
|
||||
Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`.
|
||||
|
||||
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
|
||||
of an identical network (either self or another identical network)
|
||||
"""
|
||||
pass
|
||||
|
||||
def apply_and_reset_gradients(self, gradients):
|
||||
def apply_gradients(self, gradients: List[np.ndarray]) -> None:
|
||||
"""
|
||||
Applies the given gradients to the network weights.
|
||||
Will be performed sync or async depending on `network_parameters.async_training`
|
||||
|
||||
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
|
||||
of an identical network (either self or another identical network)
|
||||
"""
|
||||
pass
|
||||
|
||||
def apply_gradients(self, gradients):
|
||||
def get_variable_value(self, variable: Any) -> np.ndarray:
|
||||
"""
|
||||
Gets value of a specified variable. Type of variable is dependant on the framework.
|
||||
Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
|
||||
or could be a string representing the value.
|
||||
|
||||
:param variable: variable of interest
|
||||
:return: value of the specified variable
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_variable_value(self, variable):
|
||||
pass
|
||||
def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any):
|
||||
"""
|
||||
Updates the value of a specified variable. Type of assign_op is dependant on the framework
|
||||
and is a unique identifier for assigning value to a variable. For example an agent may use
|
||||
head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
|
||||
(in the example above, placeholder would be head.kl_coefficient_ph).
|
||||
|
||||
def set_variable_value(self, assign_op, value, placeholder=None):
|
||||
:param assign_op: a parameter representing the operation for assigning value to a specific variable
|
||||
:param value: value of the specified variable used for update
|
||||
:param placeholder: a placeholder for binding the value to assign_op.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -146,6 +146,14 @@ class TensorFlowArchitecture(Architecture):
|
||||
# set the fetches for training
|
||||
self._set_initial_fetch_list()
|
||||
|
||||
def get_model(self) -> None:
|
||||
"""
|
||||
Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`,
|
||||
`output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`,
|
||||
`current_learning_rate`, and `optimizer`
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _set_initial_fetch_list(self):
|
||||
"""
|
||||
Create an initial list of tensors to fetch in each training iteration
|
||||
|
||||
Reference in New Issue
Block a user