mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Add docstring for architecture (#47)
- Removed get_model() from architecture because it is only implementation detail of architecture.
This commit is contained in:
committed by
Gal Leibovich
parent
324c67d614
commit
2046358ab0
@@ -14,6 +14,10 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from rl_coach.base_parameters import AgentParameters
|
from rl_coach.base_parameters import AgentParameters
|
||||||
from rl_coach.spaces import SpacesDefinition
|
from rl_coach.spaces import SpacesDefinition
|
||||||
|
|
||||||
@@ -21,15 +25,15 @@ from rl_coach.spaces import SpacesDefinition
|
|||||||
class Architecture(object):
|
class Architecture(object):
|
||||||
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""):
|
def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""):
|
||||||
"""
|
"""
|
||||||
|
Creates a neural network 'architecture', that can be trained and used for inference.
|
||||||
|
|
||||||
:param agent_parameters: the agent parameters
|
:param agent_parameters: the agent parameters
|
||||||
:param spaces: the spaces (observation, action, etc.) definition of the agent
|
:param spaces: the spaces (observation, action, etc.) definition of the agent
|
||||||
:param name: the name of the network
|
:param name: the name of the network
|
||||||
"""
|
"""
|
||||||
# spaces
|
|
||||||
self.spaces = spaces
|
self.spaces = spaces
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.network_wrapper_name = self.name.split('/')[0] # the name can be main/online and the network_wrapper_name will be main
|
self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main'
|
||||||
self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
|
self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
|
||||||
self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
|
self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
|
||||||
self.batch_size = self.network_parameters.batch_size
|
self.batch_size = self.network_parameters.batch_size
|
||||||
@@ -37,35 +41,154 @@ class Architecture(object):
|
|||||||
self.optimizer = None
|
self.optimizer = None
|
||||||
self.ap = agent_parameters
|
self.ap = agent_parameters
|
||||||
|
|
||||||
def get_model(self):
|
def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Given input observations, use the model to make predictions (e.g. action or value).
|
||||||
|
|
||||||
|
:param inputs: current state (i.e. observations, measurements, goals, etc.)
|
||||||
|
(e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size))
|
||||||
|
:return: predictions of action or value of shape (batch_size, action_space_size) for action predictions)
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def predict(self, inputs):
|
def train_on_batch(self,
|
||||||
|
inputs: Dict[str, np.ndarray],
|
||||||
|
targets: List[np.ndarray],
|
||||||
|
scaler: float=1.,
|
||||||
|
additional_fetches: list=None,
|
||||||
|
importance_weights: np.ndarray=None) -> tuple:
|
||||||
|
"""
|
||||||
|
Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
|
||||||
|
forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
|
||||||
|
update the weights.
|
||||||
|
Calls `accumulate_gradients` followed by `apply_and_reset_gradients`.
|
||||||
|
Note: Currently an unused method.
|
||||||
|
|
||||||
|
:param inputs: typically the environment states (but can also contain other data necessary for loss).
|
||||||
|
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
|
||||||
|
(batch_size, observation_space_size, stack_size) or
|
||||||
|
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
|
||||||
|
:param targets: target values of shape (batch_size, ). For example discounted rewards for value network
|
||||||
|
for calculating the value-network loss would be a target. Length of list and order of arrays in
|
||||||
|
the list matches that of network losses which are defined by network parameters
|
||||||
|
:param scaler: value to scale gradients by before optimizing network weights
|
||||||
|
:param additional_fetches: list of additional values to fetch and return. The type of each list
|
||||||
|
element is framework dependent.
|
||||||
|
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
|
||||||
|
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
|
||||||
|
total_loss (float): sum of all head losses
|
||||||
|
losses (list of float): list of all losses. The order is list of target losses followed by list
|
||||||
|
of regularization losses. The specifics of losses is dependant on the network parameters
|
||||||
|
(number of heads, etc.)
|
||||||
|
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
|
||||||
|
fetched_tensors: all values for additional_fetches
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def train_on_batch(self, inputs, targets):
|
def get_weights(self) -> List[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.
|
||||||
|
|
||||||
|
:return: list weights as ndarray
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_weights(self):
|
def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None:
|
||||||
|
"""
|
||||||
|
Sets model weights for provided layer parameters.
|
||||||
|
|
||||||
|
:param weights: list of model weights in the same order as received in get_weights
|
||||||
|
:param rate: controls the mixture of given weight values versus old weight values.
|
||||||
|
i.e. new_weight = rate * given_weight + (1 - rate) * old_weight
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_weights(self, weights, rate=1.0):
|
def reset_accumulated_gradients(self) -> None:
|
||||||
|
"""
|
||||||
|
Sets gradient of all parameters to 0.
|
||||||
|
|
||||||
|
Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class,
|
||||||
|
which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def reset_accumulated_gradients(self):
|
def accumulate_gradients(self,
|
||||||
|
inputs: Dict[str, np.ndarray],
|
||||||
|
targets: List[np.ndarray],
|
||||||
|
additional_fetches: list=None,
|
||||||
|
importance_weights: np.ndarray=None,
|
||||||
|
no_accumulation: bool=False) ->\
|
||||||
|
Tuple[float, List[float], float, list]:
|
||||||
|
"""
|
||||||
|
Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
|
||||||
|
gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
|
||||||
|
values if required and then accumulate gradients from all learners. It does not update the model weights,
|
||||||
|
that's performed in `apply_and_reset_gradients` method.
|
||||||
|
|
||||||
|
Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å
|
||||||
|
|
||||||
|
:param inputs: typically the environment states (but can also contain other data for loss)
|
||||||
|
(e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
|
||||||
|
(batch_size, observation_space_size, stack_size) or
|
||||||
|
`{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
|
||||||
|
:param targets: targets for calculating loss. For example discounted rewards for value network
|
||||||
|
for calculating the value-network loss would be a target. Length of list and order of arrays in
|
||||||
|
the list matches that of network losses which are defined by network parameters
|
||||||
|
:param additional_fetches: list of additional values to fetch and return. The type of each list
|
||||||
|
element is framework dependent.
|
||||||
|
:param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
|
||||||
|
:param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously
|
||||||
|
calculated gradients
|
||||||
|
:return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
|
||||||
|
total_loss (float): sum of all head losses
|
||||||
|
losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses.
|
||||||
|
The specifics of losses is dependant on the network parameters (number of heads, etc.)
|
||||||
|
norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
|
||||||
|
fetched_tensors: all values for additional_fetches
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def accumulate_gradients(self, inputs, targets):
|
def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None:
|
||||||
|
"""
|
||||||
|
Applies the given gradients to the network weights and resets the gradient accumulations.
|
||||||
|
Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`.
|
||||||
|
|
||||||
|
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
|
||||||
|
of an identical network (either self or another identical network)
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def apply_and_reset_gradients(self, gradients):
|
def apply_gradients(self, gradients: List[np.ndarray]) -> None:
|
||||||
|
"""
|
||||||
|
Applies the given gradients to the network weights.
|
||||||
|
Will be performed sync or async depending on `network_parameters.async_training`
|
||||||
|
|
||||||
|
:param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
|
||||||
|
of an identical network (either self or another identical network)
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def apply_gradients(self, gradients):
|
def get_variable_value(self, variable: Any) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Gets value of a specified variable. Type of variable is dependant on the framework.
|
||||||
|
Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
|
||||||
|
or could be a string representing the value.
|
||||||
|
|
||||||
|
:param variable: variable of interest
|
||||||
|
:return: value of the specified variable
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_variable_value(self, variable):
|
def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any):
|
||||||
pass
|
"""
|
||||||
|
Updates the value of a specified variable. Type of assign_op is dependant on the framework
|
||||||
|
and is a unique identifier for assigning value to a variable. For example an agent may use
|
||||||
|
head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
|
||||||
|
(in the example above, placeholder would be head.kl_coefficient_ph).
|
||||||
|
|
||||||
def set_variable_value(self, assign_op, value, placeholder=None):
|
:param assign_op: a parameter representing the operation for assigning value to a specific variable
|
||||||
|
:param value: value of the specified variable used for update
|
||||||
|
:param placeholder: a placeholder for binding the value to assign_op.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -146,6 +146,14 @@ class TensorFlowArchitecture(Architecture):
|
|||||||
# set the fetches for training
|
# set the fetches for training
|
||||||
self._set_initial_fetch_list()
|
self._set_initial_fetch_list()
|
||||||
|
|
||||||
|
def get_model(self) -> None:
|
||||||
|
"""
|
||||||
|
Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`,
|
||||||
|
`output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`,
|
||||||
|
`current_learning_rate`, and `optimizer`
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def _set_initial_fetch_list(self):
|
def _set_initial_fetch_list(self):
|
||||||
"""
|
"""
|
||||||
Create an initial list of tensors to fetch in each training iteration
|
Create an initial list of tensors to fetch in each training iteration
|
||||||
|
|||||||
Reference in New Issue
Block a user