# # Copyright (c) 2017 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from typing import Any, Dict, List, Tuple import numpy as np from rl_coach.base_parameters import AgentParameters from rl_coach.spaces import SpacesDefinition class Architecture(object): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""): """ Creates a neural network 'architecture', that can be trained and used for inference. :param agent_parameters: the agent parameters :param spaces: the spaces (observation, action, etc.) definition of the agent :param name: the name of the network """ self.spaces = spaces self.name = name self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main' self.full_name = "{}/{}".format(agent_parameters.full_name_id, name) self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name] self.batch_size = self.network_parameters.batch_size self.learning_rate = self.network_parameters.learning_rate self.optimizer = None self.ap = agent_parameters def predict(self, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]: """ Given input observations, use the model to make predictions (e.g. action or value). :param inputs: current state (i.e. observations, measurements, goals, etc.) (e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size)) :return: predictions of action or value of shape (batch_size, action_space_size) for action predictions) """ pass def train_on_batch(self, inputs: Dict[str, np.ndarray], targets: List[np.ndarray], scaler: float=1., additional_fetches: list=None, importance_weights: np.ndarray=None) -> tuple: """ Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to update the weights. Calls `accumulate_gradients` followed by `apply_and_reset_gradients`. Note: Currently an unused method. :param inputs: typically the environment states (but can also contain other data necessary for loss). (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or (batch_size, observation_space_size, stack_size) or `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) :param targets: target values of shape (batch_size, ). For example discounted rewards for value network for calculating the value-network loss would be a target. Length of list and order of arrays in the list matches that of network losses which are defined by network parameters :param scaler: value to scale gradients by before optimizing network weights :param additional_fetches: list of additional values to fetch and return. The type of each list element is framework dependent. :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors total_loss (float): sum of all head losses losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses. The specifics of losses is dependant on the network parameters (number of heads, etc.) norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied fetched_tensors: all values for additional_fetches """ pass def get_weights(self) -> List[np.ndarray]: """ Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks. :return: list weights as ndarray """ pass def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None: """ Sets model weights for provided layer parameters. :param weights: list of model weights in the same order as received in get_weights :param rate: controls the mixture of given weight values versus old weight values. i.e. new_weight = rate * given_weight + (1 - rate) * old_weight :return: None """ pass def reset_accumulated_gradients(self) -> None: """ Sets gradient of all parameters to 0. Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class, which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set. """ pass def accumulate_gradients(self, inputs: Dict[str, np.ndarray], targets: List[np.ndarray], additional_fetches: list=None, importance_weights: np.ndarray=None, no_accumulation: bool=False) ->\ Tuple[float, List[float], float, list]: """ Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient values if required and then accumulate gradients from all learners. It does not update the model weights, that's performed in `apply_and_reset_gradients` method. Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å :param inputs: typically the environment states (but can also contain other data for loss) (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or (batch_size, observation_space_size, stack_size) or `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) :param targets: targets for calculating loss. For example discounted rewards for value network for calculating the value-network loss would be a target. Length of list and order of arrays in the list matches that of network losses which are defined by network parameters :param additional_fetches: list of additional values to fetch and return. The type of each list element is framework dependent. :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. :param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously calculated gradients :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors total_loss (float): sum of all head losses losses (list of float): list of all losses. The order is list of target losses followed by list of regularization losses. The specifics of losses is dependant on the network parameters (number of heads, etc.) norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied fetched_tensors: all values for additional_fetches """ pass def apply_and_reset_gradients(self, gradients: List[np.ndarray]) -> None: """ Applies the given gradients to the network weights and resets the gradient accumulations. Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`. :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property of an identical network (either self or another identical network) """ pass def apply_gradients(self, gradients: List[np.ndarray]) -> None: """ Applies the given gradients to the network weights. Will be performed sync or async depending on `network_parameters.async_training` :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property of an identical network (either self or another identical network) """ pass def get_variable_value(self, variable: Any) -> np.ndarray: """ Gets value of a specified variable. Type of variable is dependant on the framework. Example of a variable is head.kl_coefficient, which could be a symbol for evaluation or could be a string representing the value. :param variable: variable of interest :return: value of the specified variable """ pass def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any): """ Updates the value of a specified variable. Type of assign_op is dependant on the framework and is a unique identifier for assigning value to a variable. For example an agent may use head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder (in the example above, placeholder would be head.kl_coefficient_ph). :param assign_op: a parameter representing the operation for assigning value to a specific variable :param value: value of the specified variable used for update :param placeholder: a placeholder for binding the value to assign_op. """ pass