Add docstring for architecture (#47)

- Removed get_model() from architecture because it is only implementation detail of architecture.
2026-03-03 23:35:51 +01:00 · 2018-10-30 02:02:37 -07:00
parent 324c67d614
commit 2046358ab0
2 changed files with 146 additions and 15 deletions
--- a/rl_coach/architectures/architecture.py
+++ b/rl_coach/architectures/architecture.py
@@ -14,6 +14,10 @@
 # limitations under the License.
 #
 from typing import Any, Dict, List, Tuple
 import numpy as np
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.spaces import SpacesDefinition
@@ -21,15 +25,15 @@ from rl_coach.spaces import SpacesDefinition
 class Architecture(object):
    def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""):
        """
        Creates a neural network 'architecture', that can be trained and used for inference.
        :param agent_parameters: the agent parameters
        :param spaces: the spaces (observation, action, etc.) definition of the agent
        :param name: the name of the network
        """
        # spaces
        self.spaces = spaces
        self.name = name
-        self.network_wrapper_name = self.name.split('/')[0]  # the name can be main/online and the network_wrapper_name will be main
+        self.network_wrapper_name = self.name.split('/')[0]  # e.g. 'main/online' --> 'main'
        self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
        self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
        self.batch_size = self.network_parameters.batch_size
@@ -37,35 +41,154 @@ class Architecture(object):
        self.optimizer = None
        self.ap = agent_parameters
-    def get_model(self):
+    def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]:
        """
        Given input observations, use the model to make predictions (e.g. action or value).
        :param inputs: current state (i.e. observations, measurements, goals, etc.)
            (e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size))
        :return: predictions of action or value of shape (batch_size, action_space_size) for action predictions)
        """
        pass
-    def predict(self, inputs):
+    def train_on_batch(self,
                       inputs: Dict[str, np.ndarray],
                       targets: List[np.ndarray],
                       scaler: float=1.,
                       additional_fetches: list=None,
                       importance_weights: np.ndarray=None) -> tuple:
        """
        Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
        forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
        update the weights.
        Calls `accumulate_gradients` followed by `apply_and_reset_gradients`.
        Note: Currently an unused method.
        :param inputs: typically the environment states (but can also contain other data necessary for loss).
            (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
            (batch_size, observation_space_size, stack_size) or
            `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
        :param targets: target values of shape (batch_size, ). For example discounted rewards for value network
            for calculating the value-network loss would be a target. Length of list and order of arrays in
            the list matches that of network losses which are defined by network parameters
        :param scaler: value to scale gradients by before optimizing network weights
        :param additional_fetches: list of additional values to fetch and return. The type of each list
            element is framework dependent.
        :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
        :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
            total_loss (float): sum of all head losses
            losses (list of float): list of all losses. The order is list of target losses followed by list
                of regularization losses. The specifics of losses is dependant on the network parameters
                (number of heads, etc.)
            norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
            fetched_tensors: all values for additional_fetches
        """
        pass
-    def train_on_batch(self, inputs, targets):
+    def get_weights(self) -> List[np.ndarray]:
        """
        Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.
        :return: list weights as ndarray
        """
        pass
-    def get_weights(self):
+    def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None:
        """
        Sets model weights for provided layer parameters.
        :param weights: list of model weights in the same order as received in get_weights
        :param rate: controls the mixture of given weight values versus old weight values.
            i.e. new_weight = rate * given_weight + (1 - rate) * old_weight
        :return: None
        """
        pass
-    def set_weights(self, weights, rate=1.0):
+    def reset_accumulated_gradients(self) -> None:
        """
        Sets gradient of all parameters to 0.
        Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class,
        which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set.
        """
        pass
-    def reset_accumulated_gradients(self):
+    def accumulate_gradients(self,
                             inputs: Dict[str, np.ndarray],
                             targets: List[np.ndarray],
                             additional_fetches: list=None,
                             importance_weights: np.ndarray=None,
                             no_accumulation: bool=False) ->\
            Tuple[float, List[float], float, list]:
        """
        Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
        gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
        values if required and then accumulate gradients from all learners. It does not update the model weights,
        that's performed in `apply_and_reset_gradients` method.
        Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å
        :param inputs: typically the environment states (but can also contain other data for loss)
            (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or
             (batch_size, observation_space_size, stack_size) or
            `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,))
        :param targets: targets for calculating loss. For example discounted rewards for value network
            for calculating the value-network loss would be a target. Length of list and order of arrays in
            the list matches that of network losses which are defined by network parameters
        :param additional_fetches: list of additional values to fetch and return. The type of each list
            element is framework dependent.
        :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss.
        :param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously
            calculated gradients
        :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
            total_loss (float): sum of all head losses
            losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses.
                The specifics of losses is dependant on the network parameters (number of heads, etc.)
            norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
            fetched_tensors: all values for additional_fetches
        """
        pass
-    def accumulate_gradients(self, inputs, targets):
+    def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None:
        """
        Applies the given gradients to the network weights and resets the gradient accumulations.
        Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`.
        :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
            of an identical network (either self or another identical network)
        """
        pass
-    def apply_and_reset_gradients(self, gradients):
+    def apply_gradients(self, gradients: List[np.ndarray]) -> None:
        """
        Applies the given gradients to the network weights.
        Will be performed sync or async depending on `network_parameters.async_training`
        :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property
            of an identical network (either self or another identical network)
        """
        pass
-    def apply_gradients(self, gradients):
+    def get_variable_value(self, variable: Any) -> np.ndarray:
        """
        Gets value of a specified variable. Type of variable is dependant on the framework.
        Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
        or could be a string representing the value.
        :param variable: variable of interest
        :return: value of the specified variable
        """
        pass
-    def get_variable_value(self, variable):
+    def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any):
-        pass
+        """
        Updates the value of a specified variable. Type of assign_op is dependant on the framework
        and is a unique identifier for assigning value to a variable. For example an agent may use
        head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
        (in the example above, placeholder would be head.kl_coefficient_ph).
-    def set_variable_value(self, assign_op, value, placeholder=None):
+        :param assign_op: a parameter representing the operation for assigning value to a specific variable
        :param value: value of the specified variable used for update
        :param placeholder: a placeholder for binding the value to assign_op.
        """
        pass
--- a/rl_coach/architectures/tensorflow_components/architecture.py
+++ b/rl_coach/architectures/tensorflow_components/architecture.py
@@ -146,6 +146,14 @@ class TensorFlowArchitecture(Architecture):
            # set the fetches for training
            self._set_initial_fetch_list()
    def get_model(self) -> None:
        """
        Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`,
        `output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`,
        `current_learning_rate`, and `optimizer`
        """
        raise NotImplementedError
    def _set_initial_fetch_list(self):
        """
        Create an initial list of tensors to fetch in each training iteration