1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00
Files
coach/docs/components/architectures/index.html
2019-07-14 18:43:48 +03:00

762 lines
51 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Architectures &mdash; Reinforcement Learning Coach 0.12.0 documentation</title>
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../_static/jquery.js"></script>
<script type="text/javascript" src="../../_static/underscore.js"></script>
<script type="text/javascript" src="../../_static/doctools.js"></script>
<script type="text/javascript" src="../../_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../../_static/js/theme.js"></script>
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Data Stores" href="../data_stores/index.html" />
<link rel="prev" title="Quantile Regression DQN" href="../agents/value_optimization/qr_dqn.html" />
<link href="../../_static/css/custom.css" rel="stylesheet" type="text/css">
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../agents/index.html">Agents</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Architectures</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#architecture">Architecture</a></li>
<li class="toctree-l2"><a class="reference internal" href="#networkwrapper">NetworkWrapper</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../additional_parameters.html">Additional Parameters</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html">Docs</a> &raquo;</li>
<li>Architectures</li>
<li class="wy-breadcrumbs-aside">
<a href="../../_sources/components/architectures/index.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="architectures">
<h1>Architectures<a class="headerlink" href="#architectures" title="Permalink to this headline"></a></h1>
<p>Architectures contain all the classes that implement the neural network related stuff for the agent.
Since Coach is intended to work with multiple neural network frameworks, each framework will implement its
own components under a dedicated directory. For example, tensorflow components will contain all the neural network
parts that are implemented using TensorFlow.</p>
<dl class="class">
<dt id="rl_coach.base_parameters.NetworkParameters">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">NetworkParameters</code><span class="sig-paren">(</span><em class="sig-param">force_cpu=False</em>, <em class="sig-param">async_training=False</em>, <em class="sig-param">shared_optimizer=True</em>, <em class="sig-param">scale_down_gradients_by_number_of_workers_for_sync_training=True</em>, <em class="sig-param">clip_gradients=None</em>, <em class="sig-param">gradients_clipping_method=&lt;GradientClippingMethod.ClipByGlobalNorm: 0&gt;</em>, <em class="sig-param">l2_regularization=0</em>, <em class="sig-param">learning_rate=0.00025</em>, <em class="sig-param">learning_rate_decay_rate=0</em>, <em class="sig-param">learning_rate_decay_steps=0</em>, <em class="sig-param">input_embedders_parameters={}</em>, <em class="sig-param">embedding_merger_type=&lt;EmbeddingMergerType.Concat: 0&gt;</em>, <em class="sig-param">middleware_parameters=None</em>, <em class="sig-param">heads_parameters=[]</em>, <em class="sig-param">use_separate_networks_per_head=False</em>, <em class="sig-param">optimizer_type='Adam'</em>, <em class="sig-param">optimizer_epsilon=0.0001</em>, <em class="sig-param">adam_optimizer_beta1=0.9</em>, <em class="sig-param">adam_optimizer_beta2=0.99</em>, <em class="sig-param">rms_prop_optimizer_decay=0.9</em>, <em class="sig-param">batch_size=32</em>, <em class="sig-param">replace_mse_with_huber_loss=False</em>, <em class="sig-param">create_target_network=False</em>, <em class="sig-param">tensorflow_support=True</em>, <em class="sig-param">softmax_temperature=1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/base_parameters.html#NetworkParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.NetworkParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>force_cpu</strong> Force the neural networks to run on the CPU even if a GPU is available</p></li>
<li><p><strong>async_training</strong> If set to True, asynchronous training will be used, meaning that each workers will progress in its own
speed, while not waiting for the rest of the workers to calculate their gradients.</p></li>
<li><p><strong>shared_optimizer</strong> If set to True, a central optimizer which will be shared with all the workers will be used for applying
gradients to the network. Otherwise, each worker will have its own optimizer with its own internal
parameters that will only be affected by the gradients calculated by that worker</p></li>
<li><p><strong>scale_down_gradients_by_number_of_workers_for_sync_training</strong> If set to True, in synchronous training, the gradients of each worker will be scaled down by the
number of workers. This essentially means that the gradients applied to the network are the average
of the gradients over all the workers.</p></li>
<li><p><strong>clip_gradients</strong> A value that will be used for clipping the gradients of the network. If set to None, no gradient clipping
will be applied. Otherwise, the gradients will be clipped according to the gradients_clipping_method.</p></li>
<li><p><strong>gradients_clipping_method</strong> A gradient clipping method, defined by a GradientClippingMethod enum, and that will be used to clip the
gradients of the network. This will only be used if the clip_gradients value is defined as a value other
than None.</p></li>
<li><p><strong>l2_regularization</strong> A L2 regularization weight that will be applied to the network weights while calculating the loss function</p></li>
<li><p><strong>learning_rate</strong> The learning rate for the network</p></li>
<li><p><strong>learning_rate_decay_rate</strong> If this value is larger than 0, an exponential decay will be applied to the network learning rate.
The rate of the decay is defined by this parameter, and the number of training steps the decay will be
applied is defined by learning_rate_decay_steps. Notice that both parameters should be defined in order
for this to work correctly.</p></li>
<li><p><strong>learning_rate_decay_steps</strong> If the learning_rate_decay_rate of the network is larger than 0, an exponential decay will be applied to
the network learning rate. The number of steps the decay will be applied is defined by this parameter.
Notice that both this parameter, as well as learning_rate_decay_rate should be defined in order for the
learning rate decay to work correctly.</p></li>
<li><p><strong>input_embedders_parameters</strong> A dictionary mapping between input names and input embedders (InputEmbedderParameters) to use for the
network. Each of the keys is an input name as returned from the environment in the state.
For example, if the environment returns a state containing observation and measurements, then
the keys for the input embedders dictionary can be either observation to use the observation as input,
measurements to use the measurements as input, or both.
The embedder type will be automatically selected according to the input type. Vector inputs will
produce a fully connected embedder, and image inputs will produce a convolutional embedder.</p></li>
<li><p><strong>embedding_merger_type</strong> The type of embedding merging to use, given by one of the EmbeddingMergerType enum values.
This will be used to merge the outputs of all the input embedders into a single embbeding.</p></li>
<li><p><strong>middleware_parameters</strong> The parameters of the middleware to use, given by a MiddlewareParameters object.
Each network will have only a single middleware embedder which will take the merged embeddings from the
input embedders and pass them through more neural network layers.</p></li>
<li><p><strong>heads_parameters</strong> A list of heads for the network given by their corresponding HeadParameters.
Each network can have one or multiple network heads, where each one will take the output of the middleware
and make some additional computation on top of it. Additionally, each head calculates a weighted loss value,
and the loss values from all the heads will be summed later on.</p></li>
<li><p><strong>use_separate_networks_per_head</strong> A flag that allows using different copies of the input embedders and middleware for each one of the heads.
Regularly, the heads will have a shared input, but in the case where use_separate_networks_per_head is set
to True, each one of the heads will get a different input.</p></li>
<li><p><strong>optimizer_type</strong> A string specifying the optimizer type to use for updating the network. The available optimizers are
Adam, RMSProp and LBFGS.</p></li>
<li><p><strong>optimizer_epsilon</strong> An internal optimizer parameter used for Adam and RMSProp.</p></li>
<li><p><strong>adam_optimizer_beta1</strong> An beta1 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the
optimizer for the network.</p></li>
<li><p><strong>adam_optimizer_beta2</strong> An beta2 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the
optimizer for the network.</p></li>
<li><p><strong>rms_prop_optimizer_decay</strong> The decay value for the RMSProp optimizer, which will be used only in case the RMSProp optimizer was
selected for this network.</p></li>
<li><p><strong>batch_size</strong> The batch size to use when updating the network.</p></li>
<li><p><strong>replace_mse_with_huber_loss</strong> </p></li>
<li><p><strong>create_target_network</strong> If this flag is set to True, an additional copy of the network will be created and initialized with the
same weights as the online network. It can then be queried, and its weights can be synced from the
online network at will.</p></li>
<li><p><strong>tensorflow_support</strong> A flag which specifies if the network is supported by the TensorFlow framework.</p></li>
<li><p><strong>softmax_temperature</strong> If a softmax is present in the network head output, use this temperature</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<div class="section" id="architecture">
<h2>Architecture<a class="headerlink" href="#architecture" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.architectures.architecture.Architecture">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.architectures.architecture.</code><code class="sig-name descname">Architecture</code><span class="sig-paren">(</span><em class="sig-param">agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em class="sig-param">spaces: rl_coach.spaces.SpacesDefinition</em>, <em class="sig-param">name: str = ''</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture" title="Permalink to this definition"></a></dt>
<dd><p>Creates a neural network architecture, that can be trained and used for inference.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>agent_parameters</strong> the agent parameters</p></li>
<li><p><strong>spaces</strong> the spaces (observation, action, etc.) definition of the agent</p></li>
<li><p><strong>name</strong> the name of the network</p></li>
</ul>
</dd>
</dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.accumulate_gradients">
<code class="sig-name descname">accumulate_gradients</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], additional_fetches: list = None, importance_weights: numpy.ndarray = None, no_accumulation: bool = False</em><span class="sig-paren">)</span> &#x2192; Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.accumulate_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.accumulate_gradients" title="Permalink to this definition"></a></dt>
<dd><p>Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
values if required and then accumulate gradients from all learners. It does not update the model weights,
thats performed in <cite>apply_and_reset_gradients</cite> method.</p>
<p>Once gradients are accumulated, they are accessed by <cite>accumulated_gradients</cite> property of this class.å</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> <p>typically the environment states (but can also contain other data for loss)
(e.g. <cite>{observation: numpy.ndarray}</cite> with <cite>observation</cite> of shape (batch_size, observation_space_size) or</p>
<blockquote>
<div><p>(batch_size, observation_space_size, stack_size) or</p>
</div></blockquote>
<p><cite>{observation: numpy.ndarray, output_0_0: numpy.ndarray}</cite> with <cite>output_0_0</cite> of shape (batch_size,))</p>
</p></li>
<li><p><strong>targets</strong> targets for calculating loss. For example discounted rewards for value network
for calculating the value-network loss would be a target. Length of list and order of arrays in
the list matches that of network losses which are defined by network parameters</p></li>
<li><p><strong>additional_fetches</strong> list of additional values to fetch and return. The type of each list
element is framework dependent.</p></li>
<li><p><strong>importance_weights</strong> ndarray of shape (batch_size,) to multiply with batch loss.</p></li>
<li><p><strong>no_accumulation</strong> if True, set gradient values to the new gradients, otherwise sum with previously
calculated gradients</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><p>tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
total_loss (float): sum of all head losses
losses (list of float): list of all losses. The order is list of target losses followed by list of</p>
<blockquote>
<div><p>regularization losses. The specifics of losses is dependant on the network parameters
(number of heads, etc.)</p>
</div></blockquote>
<p>norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
fetched_tensors: all values for additional_fetches</p>
</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.apply_and_reset_gradients">
<code class="sig-name descname">apply_and_reset_gradients</code><span class="sig-paren">(</span><em class="sig-param">gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_and_reset_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_and_reset_gradients" title="Permalink to this definition"></a></dt>
<dd><p>Applies the given gradients to the network weights and resets the gradient accumulations.
Has the same impact as calling <cite>apply_gradients</cite>, then <cite>reset_accumulated_gradients</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>gradients</strong> gradients for the parameter weights, taken from <cite>accumulated_gradients</cite> property
of an identical network (either self or another identical network)</p></li>
<li><p><strong>scaler</strong> A scaling factor that allows rescaling the gradients before applying them</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.apply_gradients">
<code class="sig-name descname">apply_gradients</code><span class="sig-paren">(</span><em class="sig-param">gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_gradients" title="Permalink to this definition"></a></dt>
<dd><p>Applies the given gradients to the network weights.
Will be performed sync or async depending on <cite>network_parameters.async_training</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>gradients</strong> gradients for the parameter weights, taken from <cite>accumulated_gradients</cite> property
of an identical network (either self or another identical network)</p></li>
<li><p><strong>scaler</strong> A scaling factor that allows rescaling the gradients before applying them</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.collect_savers">
<code class="sig-name descname">collect_savers</code><span class="sig-paren">(</span><em class="sig-param">parent_path_suffix: str</em><span class="sig-paren">)</span> &#x2192; rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.collect_savers" title="Permalink to this definition"></a></dt>
<dd><p>Collection of all savers for the network (typically only one saver for network and one for ONNX export)
:param parent_path_suffix: path suffix of the parent of the network</p>
<blockquote>
<div><p>(e.g. could be name of level manager plus name of agent)</p>
</div></blockquote>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>saver collection for the network</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.construct">
<em class="property">static </em><code class="sig-name descname">construct</code><span class="sig-paren">(</span><em class="sig-param">variable_scope: str, devices: List[str], *args, **kwargs</em><span class="sig-paren">)</span> &#x2192; rl_coach.architectures.architecture.Architecture<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.construct"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.construct" title="Permalink to this definition"></a></dt>
<dd><p>Construct a network class using the provided variable scope and on requested devices
:param variable_scope: string specifying variable scope under which to create network variables
:param devices: list of devices (can be list of Device objects, or string for TF distributed)
:param args: all other arguments for class initializer
:param kwargs: all other keyword arguments for class initializer
:return: an object which is a child of Architecture</p>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.get_variable_value">
<code class="sig-name descname">get_variable_value</code><span class="sig-paren">(</span><em class="sig-param">variable: Any</em><span class="sig-paren">)</span> &#x2192; numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_variable_value" title="Permalink to this definition"></a></dt>
<dd><p>Gets value of a specified variable. Type of variable is dependant on the framework.
Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
or could be a string representing the value.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>variable</strong> variable of interest</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>value of the specified variable</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.get_weights">
<code class="sig-name descname">get_weights</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; List[numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_weights" title="Permalink to this definition"></a></dt>
<dd><p>Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>list weights as ndarray</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.parallel_predict">
<em class="property">static </em><code class="sig-name descname">parallel_predict</code><span class="sig-paren">(</span><em class="sig-param">sess: Any, network_input_tuples: List[Tuple[Architecture, Dict[str, numpy.ndarray]]]</em><span class="sig-paren">)</span> &#x2192; Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.parallel_predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.parallel_predict" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>sess</strong> active session to use for prediction</p></li>
<li><p><strong>network_input_tuples</strong> tuple of network and corresponding input</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>list or tuple of outputs from all networks</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.predict">
<code class="sig-name descname">predict</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], outputs: List[Any] = None, squeeze_output: bool = True, initial_feed_dict: Dict[Any, numpy.ndarray] = None</em><span class="sig-paren">)</span> &#x2192; Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.predict" title="Permalink to this definition"></a></dt>
<dd><p>Given input observations, use the model to make predictions (e.g. action or value).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> current state (i.e. observations, measurements, goals, etc.)
(e.g. <cite>{observation: numpy.ndarray}</cite> of shape (batch_size, observation_space_size))</p></li>
<li><p><strong>outputs</strong> list of outputs to return. Return all outputs if unspecified. Type of the list elements
depends on the framework backend.</p></li>
<li><p><strong>squeeze_output</strong> call squeeze_list on output before returning if True</p></li>
<li><p><strong>initial_feed_dict</strong> a dictionary of extra inputs for forward pass.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>predictions of action or value of shape (batch_size, action_space_size) for action predictions)</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients">
<code class="sig-name descname">reset_accumulated_gradients</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.reset_accumulated_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients" title="Permalink to this definition"></a></dt>
<dd><p>Sets gradient of all parameters to 0.</p>
<p>Once gradients are reset, they must be accessible by <cite>accumulated_gradients</cite> property of this class,
which must return a list of numpy ndarrays. Child class must ensure that <cite>accumulated_gradients</cite> is set.</p>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.set_variable_value">
<code class="sig-name descname">set_variable_value</code><span class="sig-paren">(</span><em class="sig-param">assign_op: Any</em>, <em class="sig-param">value: numpy.ndarray</em>, <em class="sig-param">placeholder: Any</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_variable_value" title="Permalink to this definition"></a></dt>
<dd><p>Updates the value of a specified variable. Type of assign_op is dependant on the framework
and is a unique identifier for assigning value to a variable. For example an agent may use
head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
(in the example above, placeholder would be head.kl_coefficient_ph).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>assign_op</strong> a parameter representing the operation for assigning value to a specific variable</p></li>
<li><p><strong>value</strong> value of the specified variable used for update</p></li>
<li><p><strong>placeholder</strong> a placeholder for binding the value to assign_op.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.set_weights">
<code class="sig-name descname">set_weights</code><span class="sig-paren">(</span><em class="sig-param">weights: List[numpy.ndarray], rate: float = 1.0</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_weights" title="Permalink to this definition"></a></dt>
<dd><p>Sets model weights for provided layer parameters.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>weights</strong> list of model weights in the same order as received in get_weights</p></li>
<li><p><strong>rate</strong> controls the mixture of given weight values versus old weight values.
i.e. new_weight = rate * given_weight + (1 - rate) * old_weight</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>None</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.architecture.Architecture.train_on_batch">
<code class="sig-name descname">train_on_batch</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], scaler: float = 1.0, additional_fetches: list = None, importance_weights: numpy.ndarray = None</em><span class="sig-paren">)</span> &#x2192; Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.train_on_batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.train_on_batch" title="Permalink to this definition"></a></dt>
<dd><p>Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
update the weights.
Calls <cite>accumulate_gradients</cite> followed by <cite>apply_and_reset_gradients</cite>.
Note: Currently an unused method.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> typically the environment states (but can also contain other data necessary for loss).
(e.g. <cite>{observation: numpy.ndarray}</cite> with <cite>observation</cite> of shape (batch_size, observation_space_size) or
(batch_size, observation_space_size, stack_size) or
<cite>{observation: numpy.ndarray, output_0_0: numpy.ndarray}</cite> with <cite>output_0_0</cite> of shape (batch_size,))</p></li>
<li><p><strong>targets</strong> target values of shape (batch_size, ). For example discounted rewards for value network
for calculating the value-network loss would be a target. Length of list and order of arrays in
the list matches that of network losses which are defined by network parameters</p></li>
<li><p><strong>scaler</strong> value to scale gradients by before optimizing network weights</p></li>
<li><p><strong>additional_fetches</strong> list of additional values to fetch and return. The type of each list
element is framework dependent.</p></li>
<li><p><strong>importance_weights</strong> ndarray of shape (batch_size,) to multiply with batch loss.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><p>tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors
total_loss (float): sum of all head losses
losses (list of float): list of all losses. The order is list of target losses followed by list</p>
<blockquote>
<div><p>of regularization losses. The specifics of losses is dependant on the network parameters
(number of heads, etc.)</p>
</div></blockquote>
<p>norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied
fetched_tensors: all values for additional_fetches</p>
</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="networkwrapper">
<h2>NetworkWrapper<a class="headerlink" href="#networkwrapper" title="Permalink to this headline"></a></h2>
<a class="reference internal image-reference" href="../../_images/distributed.png"><img alt="../../_images/distributed.png" class="align-center" src="../../_images/distributed.png" style="width: 600px;" /></a>
<dl class="class">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.architectures.network_wrapper.</code><code class="sig-name descname">NetworkWrapper</code><span class="sig-paren">(</span><em class="sig-param">agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em class="sig-param">has_target: bool</em>, <em class="sig-param">has_global: bool</em>, <em class="sig-param">name: str</em>, <em class="sig-param">spaces: rl_coach.spaces.SpacesDefinition</em>, <em class="sig-param">replicated_device=None</em>, <em class="sig-param">worker_device=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper" title="Permalink to this definition"></a></dt>
<dd><p>The network wrapper contains multiple copies of the same network, each one with a different set of weights which is
updating in a different time scale. The network wrapper will always contain an online network.
It will contain an additional slow updating target network if it was requested by the user,
and it will contain a global network shared between different workers, if Coach is run in a single-node
multi-process distributed mode. The network wrapper contains functionality for managing these networks and syncing
between them.</p>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_and_sync_networks">
<code class="sig-name descname">apply_gradients_and_sync_networks</code><span class="sig-paren">(</span><em class="sig-param">reset_gradients=True</em>, <em class="sig-param">additional_inputs=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_and_sync_networks" title="Permalink to this definition"></a></dt>
<dd><p>Applies the gradients accumulated in the online network to the global network or to itself and syncs the
networks if necessary</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>reset_gradients</strong> If set to True, the accumulated gradients wont be reset to 0 after applying them to
the network. this is useful when the accumulated gradients are overwritten instead
if accumulated by the accumulate_gradients function. this allows reducing time
complexity for this function by around 10%</p></li>
<li><p><strong>additional_inputs</strong> optional additional inputs required for when applying the gradients (e.g. batchnorms
update ops also requires the inputs)</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_global_network">
<code class="sig-name descname">apply_gradients_to_global_network</code><span class="sig-paren">(</span><em class="sig-param">gradients=None</em>, <em class="sig-param">additional_inputs=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_global_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_global_network" title="Permalink to this definition"></a></dt>
<dd><p>Apply gradients from the online network on the global network</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>gradients</strong> optional gradients that will be used instead of teh accumulated gradients</p></li>
<li><p><strong>additional_inputs</strong> optional additional inputs required for when applying the gradients (e.g. batchnorms
update ops also requires the inputs)</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p></p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_online_network">
<code class="sig-name descname">apply_gradients_to_online_network</code><span class="sig-paren">(</span><em class="sig-param">gradients=None</em>, <em class="sig-param">additional_inputs=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_online_network" title="Permalink to this definition"></a></dt>
<dd><p>Apply gradients from the online network on itself
:param gradients: optional gradients that will be used instead of teh accumulated gradients
:param additional_inputs: optional additional inputs required for when applying the gradients (e.g. batchnorms</p>
<blockquote>
<div><p>update ops also requires the inputs)</p>
</div></blockquote>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p></p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers">
<code class="sig-name descname">collect_savers</code><span class="sig-paren">(</span><em class="sig-param">parent_path_suffix: str</em><span class="sig-paren">)</span> &#x2192; rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers" title="Permalink to this definition"></a></dt>
<dd><p>Collect all of networks savers for global or online network
Note: global, online, and target network are all copies fo the same network which parameters that are</p>
<blockquote>
<div><p>updated at different rates. So we only need to save one of the networks; the one that holds the most
recent parameters. target network is created for some agents and used for stabilizing training by
updating parameters from online network at a slower rate. As a result, target network never contains
the most recent set of parameters. In single-worker training, no global network is created and online
network contains the most recent parameters. In vertical distributed training with more than one worker,
global network is updated by all workers and contains the most recent parameters.
Therefore preference is given to global network if it exists, otherwise online network is used
for saving.</p>
</div></blockquote>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>parent_path_suffix</strong> path suffix of the parent of the network wrapper
(e.g. could be name of level manager plus name of agent)</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>collection of all checkpoint objects</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction">
<code class="sig-name descname">parallel_prediction</code><span class="sig-paren">(</span><em class="sig-param">network_input_tuples: List[Tuple]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.parallel_prediction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction" title="Permalink to this definition"></a></dt>
<dd><p>Run several network prediction in parallel. Currently this only supports running each of the network once.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>network_input_tuples</strong> a list of tuples where the first element is the network (online_network,
target_network or global_network) and the second element is the inputs</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>the outputs of all the networks in the same order as the inputs were given</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training">
<code class="sig-name descname">set_is_training</code><span class="sig-paren">(</span><em class="sig-param">state: bool</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.set_is_training"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training" title="Permalink to this definition"></a></dt>
<dd><p>Set the phase of the network between training and testing</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>state</strong> The current state (True = Training, False = Testing)</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>None</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.sync">
<code class="sig-name descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.sync" title="Permalink to this definition"></a></dt>
<dd><p>Initializes the weights of the networks to match each other</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p></p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.train_and_sync_networks">
<code class="sig-name descname">train_and_sync_networks</code><span class="sig-paren">(</span><em class="sig-param">inputs</em>, <em class="sig-param">targets</em>, <em class="sig-param">additional_fetches=[]</em>, <em class="sig-param">importance_weights=None</em>, <em class="sig-param">use_inputs_for_apply_gradients=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.train_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.train_and_sync_networks" title="Permalink to this definition"></a></dt>
<dd><p>A generic training function that enables multi-threading training using a global network if necessary.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> The inputs for the network.</p></li>
<li><p><strong>targets</strong> The targets corresponding to the given inputs</p></li>
<li><p><strong>additional_fetches</strong> Any additional tensor the user wants to fetch</p></li>
<li><p><strong>importance_weights</strong> A coefficient for each sample in the batch, which will be used to rescale the loss
error of this sample. If it is not given, the samples losses wont be scaled</p></li>
<li><p><strong>use_inputs_for_apply_gradients</strong> Add the inputs also for when applying gradients
(e.g. for incorporating batchnorm update ops)</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>The loss of the training iteration</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.update_online_network">
<code class="sig-name descname">update_online_network</code><span class="sig-paren">(</span><em class="sig-param">rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_online_network" title="Permalink to this definition"></a></dt>
<dd><p>Copy weights: global network &gt;&gt;&gt; online network</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>rate</strong> the rate of copying the weights - 1 for copying exactly</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.update_target_network">
<code class="sig-name descname">update_target_network</code><span class="sig-paren">(</span><em class="sig-param">rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_target_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_target_network" title="Permalink to this definition"></a></dt>
<dd><p>Copy weights: online network &gt;&gt;&gt; target network</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>rate</strong> the rate of copying the weights - 1 for copying exactly</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../data_stores/index.html" class="btn btn-neutral float-right" title="Data Stores" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="../agents/value_optimization/qr_dqn.html" class="btn btn-neutral float-left" title="Quantile Regression DQN" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018-2019, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>