coach/docs/design/network.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">

  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>Network Design &mdash; Reinforcement Learning Coach 0.12.0 documentation</title>


  <script type="text/javascript" src="../_static/js/modernizr.min.js"></script>


      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script type="text/javascript" src="../_static/jquery.js"></script>
        <script type="text/javascript" src="../_static/underscore.js"></script>
        <script type="text/javascript" src="../_static/doctools.js"></script>
        <script type="text/javascript" src="../_static/language_data.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>

    <script type="text/javascript" src="../_static/js/theme.js"></script>


  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Distributed Coach - Horizontal Scale-Out" href="horizontal_scaling.html" />
    <link rel="prev" title="Control Flow" href="control_flow.html" />
    <link href="../_static/css/custom.css" rel="stylesheet" type="text/css">

</head>

<body class="wy-body-for-nav">


  <div class="wy-grid-for-nav">

    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >


            <a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach


            <img src="../_static/dark_logo.png" class="logo" alt="Logo"/>

          </a>


<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>


        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">


              <p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="control_flow.html">Control Flow</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Network Design</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#keeping-network-copies-in-sync">Keeping Network Copies in Sync</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/additional_parameters.html">Additional Parameters</a></li>
</ul>


        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">


      <nav class="wy-nav-top" aria-label="top navigation">

          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">Reinforcement Learning Coach</a>

      </nav>


      <div class="wy-nav-content">

        <div class="rst-content">


<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">

      <li><a href="../index.html">Docs</a> &raquo;</li>

      <li>Network Design</li>


      <li class="wy-breadcrumbs-aside">


            <a href="../_sources/design/network.rst.txt" rel="nofollow"> View page source</a>


      </li>

  </ul>


  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">

  <div class="section" id="network-design">
<h1>Network Design<a class="headerlink" href="#network-design" title="Permalink to this headline">¶</a></h1>
<p>Each agent has at least one neural network, used as the function approximator, for choosing the actions.
The network is designed in a modular way to allow reusability in different agents.
It is separated into three main parts:</p>
<ul>
<li><p><strong>Input Embedders</strong> - This is the first stage of the network, meant to convert the input into a feature vector representation.
It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs.</p>
<blockquote>
<div><p>There are two main types of input embedders:</p>
<ol class="arabic simple">
<li><p>Image embedder - Convolutional neural network.</p></li>
<li><p>Vector embedder - Multi-layer perceptron.</p></li>
</ol>
</div></blockquote>
</li>
<li><p><strong>Middlewares</strong> - The middleware gets the output of the input embedder, and processes it into a different representation domain,
before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of
several input embedders, and pass them through some extra processing.
This, for instance, might include an LSTM or just a plain simple FC layer.</p></li>
<li><p><strong>Output Heads</strong> - The output head is used in order to predict the values required from the network.
These might include action-values, state-values or a policy. As with the input embedders,
it is possible to use several output heads in the same network. For example, the <em>Actor Critic</em> agent combines two
heads - a policy head and a state-value head.
In addition, the output heads defines the loss function according to the head type.</p>
<p></p>
</li>
</ul>
<a class="reference internal image-reference" href="../_images/network.png"><img alt="../_images/network.png" class="align-center" src="../_images/network.png" style="width: 400px;" /></a>
<div class="section" id="keeping-network-copies-in-sync">
<h2>Keeping Network Copies in Sync<a class="headerlink" href="#keeping-network-copies-in-sync" title="Permalink to this headline">¶</a></h2>
<p>Most of the reinforcement learning agents include more than one copy of the neural network.
These copies serve as counterparts of the main network which are updated in different rates,
and are often synchronized either locally or between parallel workers. For easier synchronization of those copies,
a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent.
In this wrapper, 3 types of networks can be defined:</p>
<ul class="simple">
<li><p><strong>online network</strong> - A mandatory network which is the main network the agent will use</p></li>
<li><p><strong>global network</strong> - An optional network which is shared between workers in single-node multi-process distributed learning.
It is updated by all the workers directly, and holds the most up-to-date weights.</p></li>
<li><p><strong>target network</strong> - An optional network which is local for each worker. It can be used in order to keep a copy of
the weights stable for a long period of time. This is used in different agents, like DQN for example, in order to
have stable targets for the online network while training it.</p></li>
</ul>
<a class="reference internal image-reference" href="../_images/distributed.png"><img alt="../_images/distributed.png" class="align-center" src="../_images/distributed.png" style="width: 600px;" /></a>
</div>
</div>


           </div>

          </div>
          <footer>

    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">

        <a href="horizontal_scaling.html" class="btn btn-neutral float-right" title="Distributed Coach - Horizontal Scale-Out" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>


        <a href="control_flow.html" class="btn btn-neutral float-left" title="Control Flow" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>

    </div>


  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2018-2019, Intel AI Lab

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.

</footer>

        </div>
      </div>

    </section>

  </div>


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>


</body>
</html>