pre-release 0.10.0
244
docs/404.html
Normal file
@@ -0,0 +1,244 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="/img/favicon.ico">
|
||||
<title>Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="/css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="/css/highlight.css">
|
||||
<link href="/extra.css" rel="stylesheet">
|
||||
|
||||
<script src="/js/jquery-2.1.1.min.js"></script>
|
||||
<script src="/js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="/js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="/" class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="/search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="/">Home</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="/usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="/dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="/contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="/">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="/">Docs</a> »</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
|
||||
<h1 id="404-page-not-found">404</h1>
|
||||
|
||||
<p><strong>Page not found</strong></p>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '';</script>
|
||||
<script src="/js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="/search/require.js"></script>
|
||||
<script src="/search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
0
docs/__init__.py
Normal file
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Behavioral Cloning - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Behavioral Cloning - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Behavioral Cloning";
|
||||
var mkdocs_page_input_path = "algorithms/imitation/bc.md";
|
||||
var mkdocs_page_url = "/algorithms/imitation/bc/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Behavioral Cloning</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#behavioral-cloning">Behavioral Cloning</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Behavioral Cloning</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#behavioral-cloning">Behavioral Cloning</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -301,10 +252,10 @@ The training goal is to reduce the difference between the actions predicted by t
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../../../dashboard/index.html" class="btn btn-neutral float-right" title="Coach Dashboard"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../../../dashboard/" class="btn btn-neutral float-right" title="Coach Dashboard">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../other/dfp/index.html" class="btn btn-neutral" title="Direct Future Prediction"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../../other/dfp/" class="btn btn-neutral" title="Direct Future Prediction"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -318,7 +269,7 @@ The training goal is to reduce the difference between the actions predicted by t
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -326,17 +277,22 @@ The training goal is to reduce the difference between the actions predicted by t
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../other/dfp/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../../other/dfp/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../../../dashboard/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../../../dashboard/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Direct Future Prediction - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Direct Future Prediction - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Direct Future Prediction";
|
||||
var mkdocs_page_input_path = "algorithms/other/dfp.md";
|
||||
var mkdocs_page_url = "/algorithms/other/dfp/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Direct Future Prediction</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#direct-future-prediction">Direct Future Prediction</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Direct Future Prediction</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#direct-future-prediction">Direct Future Prediction</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -302,10 +253,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../../imitation/bc/index.html" class="btn btn-neutral float-right" title="Behavioral Cloning"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../../imitation/bc/" class="btn btn-neutral float-right" title="Behavioral Cloning">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../policy_optimization/cppo/index.html" class="btn btn-neutral" title="Clipped Proximal Policy Optimization"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../../policy_optimization/cppo/" class="btn btn-neutral" title="Clipped Proximal Policy Optimization"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -319,7 +270,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -327,17 +278,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../policy_optimization/cppo/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../../policy_optimization/cppo/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../../imitation/bc/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../../imitation/bc/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Actor-Critic - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Actor-Critic - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Actor-Critic";
|
||||
var mkdocs_page_input_path = "algorithms/policy_optimization/ac.md";
|
||||
var mkdocs_page_url = "/algorithms/policy_optimization/ac/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Actor-Critic</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#actor-critic">Actor-Critic</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Actor-Critic</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#actor-critic">Actor-Critic</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -302,10 +253,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../ddpg/index.html" class="btn btn-neutral float-right" title="Deep Determinstic Policy Gradients"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../ddpg/" class="btn btn-neutral float-right" title="Deep Determinstic Policy Gradients">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../pg/index.html" class="btn btn-neutral" title="Policy Gradient"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../pg/" class="btn btn-neutral" title="Policy Gradient"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -319,7 +270,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -327,17 +278,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../pg/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../pg/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../ddpg/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../ddpg/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Clipped Proximal Policy Optimization - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Clipped Proximal Policy Optimization - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Clipped Proximal Policy Optimization";
|
||||
var mkdocs_page_input_path = "algorithms/policy_optimization/cppo.md";
|
||||
var mkdocs_page_url = "/algorithms/policy_optimization/cppo/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#clipped-proximal-policy-optimization">Clipped Proximal Policy Optimization</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Clipped Proximal Policy Optimization</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#clipped-proximal-policy-optimization">Clipped Proximal Policy Optimization</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -312,10 +263,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../../other/dfp/index.html" class="btn btn-neutral float-right" title="Direct Future Prediction"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../../other/dfp/" class="btn btn-neutral float-right" title="Direct Future Prediction">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../ppo/index.html" class="btn btn-neutral" title="Proximal Policy Optimization"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../ppo/" class="btn btn-neutral" title="Proximal Policy Optimization"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -329,7 +280,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -337,17 +288,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../ppo/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../ppo/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../../other/dfp/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../../other/dfp/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Deep Determinstic Policy Gradients - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Deep Determinstic Policy Gradients - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Deep Determinstic Policy Gradients";
|
||||
var mkdocs_page_input_path = "algorithms/policy_optimization/ddpg.md";
|
||||
var mkdocs_page_url = "/algorithms/policy_optimization/ddpg/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#deep-deterministic-policy-gradient">Deep Deterministic Policy Gradient</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Deep Determinstic Policy Gradients</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#deep-deterministic-policy-gradient">Deep Deterministic Policy Gradient</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -310,10 +261,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../ppo/index.html" class="btn btn-neutral float-right" title="Proximal Policy Optimization"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../ppo/" class="btn btn-neutral float-right" title="Proximal Policy Optimization">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../ac/index.html" class="btn btn-neutral" title="Actor-Critic"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../ac/" class="btn btn-neutral" title="Actor-Critic"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -327,7 +278,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -335,17 +286,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../ac/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../ac/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../ppo/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../ppo/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Policy Gradient - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Policy Gradient - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Policy Gradient";
|
||||
var mkdocs_page_input_path = "algorithms/policy_optimization/pg.md";
|
||||
var mkdocs_page_url = "/algorithms/policy_optimization/pg/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Policy Gradient</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#policy-gradient">Policy Gradient</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Policy Gradient</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#policy-gradient">Policy Gradient</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -302,10 +253,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../ac/index.html" class="btn btn-neutral float-right" title="Actor-Critic"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../ac/" class="btn btn-neutral float-right" title="Actor-Critic">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../value_optimization/naf/index.html" class="btn btn-neutral" title="Normalized Advantage Functions"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../../value_optimization/naf/" class="btn btn-neutral" title="Normalized Advantage Functions"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -319,7 +270,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -327,17 +278,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../value_optimization/naf/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../../value_optimization/naf/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../ac/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../ac/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Proximal Policy Optimization - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Proximal Policy Optimization - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Proximal Policy Optimization";
|
||||
var mkdocs_page_input_path = "algorithms/policy_optimization/ppo.md";
|
||||
var mkdocs_page_url = "/algorithms/policy_optimization/ppo/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Proximal Policy Optimization</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#proximal-policy-optimization">Proximal Policy Optimization</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Proximal Policy Optimization</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#proximal-policy-optimization">Proximal Policy Optimization</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -303,10 +254,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../cppo/index.html" class="btn btn-neutral float-right" title="Clipped Proximal Policy Optimization"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../cppo/" class="btn btn-neutral float-right" title="Clipped Proximal Policy Optimization">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../ddpg/index.html" class="btn btn-neutral" title="Deep Determinstic Policy Gradients"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../ddpg/" class="btn btn-neutral" title="Deep Determinstic Policy Gradients"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -320,7 +271,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -328,17 +279,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../ddpg/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../ddpg/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../cppo/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../cppo/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Bootstrapped DQN - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Bootstrapped DQN - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Bootstrapped DQN";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/bs_dqn.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/bs_dqn/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Bootstrapped DQN</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#bootstrapped-dqn">Bootstrapped DQN</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Bootstrapped DQN</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#bootstrapped-dqn">Bootstrapped DQN</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -304,10 +255,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../n_step/index.html" class="btn btn-neutral float-right" title="N-Step Q Learning"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../n_step/" class="btn btn-neutral float-right" title="N-Step Q Learning">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../nec/index.html" class="btn btn-neutral" title="Neural Episodic Control"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../nec/" class="btn btn-neutral" title="Neural Episodic Control"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -321,7 +272,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -329,17 +280,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../nec/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../nec/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../n_step/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../n_step/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Categorical DQN - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Categorical DQN - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Categorical DQN";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/categorical_dqn.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/categorical_dqn/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Categorical DQN</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#categorical-dqn">Categorical DQN</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Categorical DQN</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#categorical-dqn">Categorical DQN</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -313,10 +264,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../mmc/index.html" class="btn btn-neutral float-right" title="Mixed Monte Carlo"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../mmc/" class="btn btn-neutral float-right" title="Mixed Monte Carlo">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../dueling_dqn/index.html" class="btn btn-neutral" title="Dueling DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../dueling_dqn/" class="btn btn-neutral" title="Dueling DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -330,7 +281,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -338,17 +289,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../dueling_dqn/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../dueling_dqn/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../mmc/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../mmc/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Double DQN - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Double DQN - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Double DQN";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/double_dqn.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/double_dqn/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Double DQN</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#double-dqn">Double DQN</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Double DQN</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#double-dqn">Double DQN</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -308,10 +259,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../dueling_dqn/index.html" class="btn btn-neutral float-right" title="Dueling DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../dueling_dqn/" class="btn btn-neutral float-right" title="Dueling DQN">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../dqn/index.html" class="btn btn-neutral" title="DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../dqn/" class="btn btn-neutral" title="DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -325,7 +276,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -333,17 +284,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../dqn/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../dqn/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../dueling_dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../dueling_dqn/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>DQN - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>DQN - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "DQN";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/dqn.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/dqn/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">DQN</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#deep-q-networks">Deep Q Networks</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">DQN</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#deep-q-networks">Deep Q Networks</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -307,10 +258,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../double_dqn/index.html" class="btn btn-neutral float-right" title="Double DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../double_dqn/" class="btn btn-neutral float-right" title="Double DQN">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../../usage/index.html" class="btn btn-neutral" title="Usage"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../../../design/filters/" class="btn btn-neutral" title="Filters"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -324,7 +275,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -332,17 +283,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../../usage/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../../../design/filters/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../double_dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../double_dqn/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Dueling DQN - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Dueling DQN - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Dueling DQN";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/dueling_dqn.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/dueling_dqn/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Dueling DQN</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#dueling-dqn">Dueling DQN</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#general-description">General Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Dueling DQN</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#dueling-dqn">Dueling DQN</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#general-description">General Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -297,10 +248,10 @@ This is especially important in environments where there are many actions to cho
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../categorical_dqn/index.html" class="btn btn-neutral float-right" title="Categorical DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../categorical_dqn/" class="btn btn-neutral float-right" title="Categorical DQN">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../double_dqn/index.html" class="btn btn-neutral" title="Double DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../double_dqn/" class="btn btn-neutral" title="Double DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -314,7 +265,7 @@ This is especially important in environments where there are many actions to cho
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -322,17 +273,22 @@ This is especially important in environments where there are many actions to cho
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../double_dqn/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../double_dqn/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../categorical_dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../categorical_dqn/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Mixed Monte Carlo - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Mixed Monte Carlo - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Mixed Monte Carlo";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/mmc.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/mmc/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Mixed Monte Carlo</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#mixed-monte-carlo">Mixed Monte Carlo</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Mixed Monte Carlo</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#mixed-monte-carlo">Mixed Monte Carlo</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -309,10 +260,10 @@ Once in every few thousand steps, copy the weights from the online network to th
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../pal/index.html" class="btn btn-neutral float-right" title="Persistent Advantage Learning"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../pal/" class="btn btn-neutral float-right" title="Persistent Advantage Learning">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../categorical_dqn/index.html" class="btn btn-neutral" title="Categorical DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../categorical_dqn/" class="btn btn-neutral" title="Categorical DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -326,7 +277,7 @@ Once in every few thousand steps, copy the weights from the online network to th
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -334,17 +285,22 @@ Once in every few thousand steps, copy the weights from the online network to th
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../categorical_dqn/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../categorical_dqn/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../pal/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../pal/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>N-Step Q Learning - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>N-Step Q Learning - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "N-Step Q Learning";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/n_step.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/n_step/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">N-Step Q Learning</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#n-step-q-learning">N-Step Q Learning</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">N-Step Q Learning</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#n-step-q-learning">N-Step Q Learning</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -308,10 +259,10 @@ where <script type="math/tex">k</script> is <script type="math/tex">T_{max} - St
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../naf/index.html" class="btn btn-neutral float-right" title="Normalized Advantage Functions"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../naf/" class="btn btn-neutral float-right" title="Normalized Advantage Functions">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../bs_dqn/index.html" class="btn btn-neutral" title="Bootstrapped DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../bs_dqn/" class="btn btn-neutral" title="Bootstrapped DQN"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -325,7 +276,7 @@ where <script type="math/tex">k</script> is <script type="math/tex">T_{max} - St
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -333,17 +284,22 @@ where <script type="math/tex">k</script> is <script type="math/tex">T_{max} - St
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../bs_dqn/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../bs_dqn/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../naf/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../naf/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Normalized Advantage Functions - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Normalized Advantage Functions - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Normalized Advantage Functions";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/naf.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/naf/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Normalized Advantage Functions</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#normalized-advantage-functions">Normalized Advantage Functions</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Normalized Advantage Functions</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#normalized-advantage-functions">Normalized Advantage Functions</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -300,10 +251,10 @@ After every training step, use a soft update in order to copy the weights from t
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../../policy_optimization/pg/index.html" class="btn btn-neutral float-right" title="Policy Gradient"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../../policy_optimization/pg/" class="btn btn-neutral float-right" title="Policy Gradient">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../n_step/index.html" class="btn btn-neutral" title="N-Step Q Learning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../n_step/" class="btn btn-neutral" title="N-Step Q Learning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -317,7 +268,7 @@ After every training step, use a soft update in order to copy the weights from t
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -325,17 +276,22 @@ After every training step, use a soft update in order to copy the weights from t
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../n_step/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../n_step/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../../policy_optimization/pg/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../../policy_optimization/pg/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Neural Episodic Control - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Neural Episodic Control - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Neural Episodic Control";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/nec.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/nec/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Neural Episodic Control</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#neural-episodic-control">Neural Episodic Control</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Neural Episodic Control</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#neural-episodic-control">Neural Episodic Control</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -307,10 +258,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../bs_dqn/index.html" class="btn btn-neutral float-right" title="Bootstrapped DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../bs_dqn/" class="btn btn-neutral float-right" title="Bootstrapped DQN">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../pal/index.html" class="btn btn-neutral" title="Persistent Advantage Learning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../pal/" class="btn btn-neutral" title="Persistent Advantage Learning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -324,7 +275,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -332,17 +283,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../pal/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../pal/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../bs_dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../bs_dqn/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Persistent Advantage Learning - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Persistent Advantage Learning - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../css/highlight.css">
|
||||
<link href="../../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Persistent Advantage Learning";
|
||||
var mkdocs_page_input_path = "algorithms/value_optimization/pal.md";
|
||||
var mkdocs_page_url = "/algorithms/value_optimization/pal/";
|
||||
</script>
|
||||
|
||||
<script src="../../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,195 +45,150 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Persistent Advantage Learning</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#persistent-advantage-learning">Persistent Advantage Learning</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Persistent Advantage Learning</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
<li class="toctree-l3"><a href="#persistent-advantage-learning">Persistent Advantage Learning</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l4" href="#network-structure">Network Structure</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#algorithm-description">Algorithm Description</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -249,7 +200,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -321,10 +272,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../nec/index.html" class="btn btn-neutral float-right" title="Neural Episodic Control"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../nec/" class="btn btn-neutral float-right" title="Neural Episodic Control">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../mmc/index.html" class="btn btn-neutral" title="Mixed Monte Carlo"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../mmc/" class="btn btn-neutral" title="Mixed Monte Carlo"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -338,7 +289,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -346,17 +297,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../mmc/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../mmc/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../nec/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../nec/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../../..';</script>
|
||||
<script src="../../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../../search/require.js"></script>
|
||||
<script src="../../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Adding a New Agent - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Adding a New Agent - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Adding a New Agent";
|
||||
var mkdocs_page_input_path = "contributing/add_agent.md";
|
||||
var mkdocs_page_url = "/contributing/add_agent/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,188 +45,139 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Adding a New Agent</a>
|
||||
|
||||
<ul>
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Adding a New Agent</a>
|
||||
<ul class="subnav">
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -242,7 +189,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -273,42 +220,72 @@
|
||||
<p>Coach's modularity makes adding an agent a simple and clean task, that involves the following steps:</p>
|
||||
<ol>
|
||||
<li>
|
||||
<p>Implement your algorithm in a new file under the agents directory. The agent can inherit base classes such as <strong>ValueOptimizationAgent</strong> or <strong>ActorCriticAgent</strong>, or the more generic <strong>Agent</strong> base class.</p>
|
||||
<p>Implement your algorithm in a new file. The agent can inherit base classes such as <strong>ValueOptimizationAgent</strong> or
|
||||
<strong>ActorCriticAgent</strong>, or the more generic <strong>Agent</strong> base class.</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>ValueOptimizationAgent</strong>, <strong>PolicyOptimizationAgent</strong> and <strong>Agent</strong> are abstract classes.
|
||||
learn_from_batch() should be overriden with the desired behavior for the algorithm being implemented. If deciding to inherit from <strong>Agent</strong>, also choose_action() should be overriden. </p>
|
||||
<pre><code>def learn_from_batch(self, batch):
|
||||
<li><strong>ValueOptimizationAgent</strong>, <strong>PolicyOptimizationAgent</strong> and <strong>Agent</strong> are abstract classes.
|
||||
learn_from_batch() should be overriden with the desired behavior for the algorithm being implemented.
|
||||
If deciding to inherit from <strong>Agent</strong>, also choose_action() should be overriden.<pre><code>def learn_from_batch(self, batch) -> Tuple[float, List, List]:
|
||||
"""
|
||||
Given a batch of transitions, calculates their target values and updates the network.
|
||||
:param batch: A list of transitions
|
||||
:return: The loss of the training
|
||||
:return: The total loss of the training, the loss per head and the unclipped gradients
|
||||
"""
|
||||
pass
|
||||
|
||||
def choose_action(self, curr_state, phase=RunPhase.TRAIN):
|
||||
def choose_action(self, curr_state):
|
||||
"""
|
||||
choose an action to act with in the current episode being played. Different behavior might be exhibited when training
|
||||
or testing.
|
||||
|
||||
:param curr_state: the current state to act upon.
|
||||
:param phase: the current phase: training or testing.
|
||||
:param curr_state: the current state to act upon.
|
||||
:return: chosen action, some action value describing the action (q-value, probability, etc)
|
||||
"""
|
||||
pass
|
||||
</code></pre>
|
||||
</li>
|
||||
<li>
|
||||
<p>Make sure to add your new agent to <strong>agents/__init__.py</strong></p>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<p>Implement your agent's specific network head, if needed, at the implementation for the framework of your choice. For example <strong>architectures/neon_components/heads.py</strong>. The head will inherit the generic base class Head.
|
||||
A new output type should be added to configurations.py, and a mapping between the new head and output type should be defined in the get_output_head() function at <strong>architectures/neon_components/general_network.py</strong></p>
|
||||
<p>Implement your agent's specific network head, if needed, at the implementation for the framework of your choice.
|
||||
For example <strong>architectures/neon_components/heads.py</strong>. The head will inherit the generic base class Head.
|
||||
A new output type should be added to configurations.py, and a mapping between the new head and output type should
|
||||
be defined in the get_output_head() function at <strong>architectures/neon_components/general_network.py</strong></p>
|
||||
</li>
|
||||
<li>
|
||||
<p>Define a new parameters class that inherits AgentParameters.
|
||||
The parameters class defines all the hyperparameters for the agent, and is initialized with 4 main components:</p>
|
||||
<ul>
|
||||
<li><strong>algorithm</strong>: A class inheriting AlgorithmParameters which defines any algorithm specific parameters</li>
|
||||
<li><strong>exploration</strong>: A class inheriting ExplorationParameters which defines the exploration policy parameters.
|
||||
There are several common exploration policies built-in which you can use, and are defined under
|
||||
the exploration sub directory. You can also define your own custom exploration policy.</li>
|
||||
<li><strong>memory</strong>: A class inheriting MemoryParameters which defined the memory parameters.
|
||||
There are several common memory types built-in which you can use, and are defined under the memories
|
||||
sub directory. You can also define your own custom memory.</li>
|
||||
<li><strong>networks</strong>: A dictionary defining all the networks that will be used by the agent. The keys of the dictionary
|
||||
define the network name and will be used to access each network through the agent class.
|
||||
The dictionary values are a class inheriting NetworkParameters, which define the network structure
|
||||
and parameters.</li>
|
||||
</ul>
|
||||
<p>Additionally, set the path property to return the path to your agent class in the following format:</p>
|
||||
<pre><code> <path to python module>:<name of agent class>
|
||||
</code></pre>
|
||||
<p>For example,</p>
|
||||
<pre><code> class RainbowAgentParameters(AgentParameters):
|
||||
def __init__(self):
|
||||
super().__init__(algorithm=RainbowAlgorithmParameters(),
|
||||
exploration=RainbowExplorationParameters(),
|
||||
memory=RainbowMemoryParameters(),
|
||||
networks={"main": RainbowNetworkParameters()})
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return 'rainbow.rainbow_agent:RainbowAgent'
|
||||
</code></pre>
|
||||
</li>
|
||||
<li>
|
||||
<p>(Optional) Define a preset using the new agent type with a given environment, and the hyper-parameters that should
|
||||
be used for training on that environment.</p>
|
||||
</li>
|
||||
<li>Define a new configuration class at configurations.py, which includes the new agent name in the <strong>type</strong> field, the new output type in the <strong>output_types</strong> field, and assigning default values to hyperparameters.</li>
|
||||
<li>(Optional) Define a preset using the new agent type with a given environment, and the hyperparameters that should be used for training on that environment.</li>
|
||||
</ol>
|
||||
|
||||
</div>
|
||||
@@ -317,10 +294,10 @@ def choose_action(self, curr_state, phase=RunPhase.TRAIN):
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../add_env/index.html" class="btn btn-neutral float-right" title="Adding a New Environment"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../add_env/" class="btn btn-neutral float-right" title="Adding a New Environment">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../dashboard/index.html" class="btn btn-neutral" title="Coach Dashboard"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../../dashboard/" class="btn btn-neutral" title="Coach Dashboard"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -334,7 +311,7 @@ def choose_action(self, curr_state, phase=RunPhase.TRAIN):
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -342,17 +319,22 @@ def choose_action(self, curr_state, phase=RunPhase.TRAIN):
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../dashboard/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../../dashboard/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../add_env/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../add_env/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Adding a New Environment - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Adding a New Environment - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Adding a New Environment";
|
||||
var mkdocs_page_input_path = "contributing/add_env.md";
|
||||
var mkdocs_page_url = "/contributing/add_env/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,188 +45,145 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../..">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Adding a New Environment</a>
|
||||
|
||||
<ul>
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Adding a New Environment</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l3"><a href="#using-the-openai-gym-api">Using the OpenAI Gym API</a></li>
|
||||
|
||||
|
||||
<li class="toctree-l3"><a href="#using-the-coach-api">Using the Coach API</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -242,7 +195,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -269,74 +222,81 @@
|
||||
<div class="section">
|
||||
|
||||
<p>Adding a new environment to Coach is as easy as solving CartPole. </p>
|
||||
<p>There are essentially two ways to integrate new environments to Coach:</p>
|
||||
<h2 id="using-the-openai-gym-api">Using the OpenAI Gym API</h2>
|
||||
<p>If your environment is already using the OpenAI Gym API, you are already good to go.
|
||||
When selecting the environment parameters in the preset, use GymEnvironmentParameters(),
|
||||
and pass the path to your environment source code using the level parameter.
|
||||
You can specify additional parameters for your environment using the additional_simulator_parameters parameter.
|
||||
Take for example the definition used in the Pendulum_HAC preset:</p>
|
||||
<pre><code> env_params = GymEnvironmentParameters()
|
||||
env_params.level = "rl_coach.environments.mujoco.pendulum_with_goals:PendulumWithGoals"
|
||||
env_params.additional_simulator_parameters = {"time_limit": 1000}
|
||||
</code></pre>
|
||||
<h2 id="using-the-coach-api">Using the Coach API</h2>
|
||||
<p>There are a few simple steps to follow, and we will walk through them one by one.</p>
|
||||
<ol>
|
||||
<li>
|
||||
<p>Coach defines a simple API for implementing a new environment which is defined in environment/environment_wrapper.py.
|
||||
There are several functions to implement, but only some of them are mandatory. </p>
|
||||
<p>Create a new class for your environment, and inherit the Environment class.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>Coach defines a simple API for implementing a new environment, which are defined in environment/environment.py.
|
||||
There are several functions to implement, but only some of them are mandatory.</p>
|
||||
<p>Here are the important ones:</p>
|
||||
<pre><code> def _take_action(self, action_idx):
|
||||
<pre><code> def _take_action(self, action_idx: ActionType) -> None:
|
||||
"""
|
||||
An environment dependent function that sends an action to the simulator.
|
||||
:param action_idx: the action to perform on the environment.
|
||||
:param action_idx: the action to perform on the environment
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def _preprocess_observation(self, observation):
|
||||
"""
|
||||
Do initial observation preprocessing such as cropping, rgb2gray, rescale etc.
|
||||
Implementing this function is optional.
|
||||
:param observation: a raw observation from the environment
|
||||
:return: the preprocessed observation
|
||||
"""
|
||||
return observation
|
||||
|
||||
def _update_state(self):
|
||||
def _update_state(self) -> None:
|
||||
"""
|
||||
Updates the state from the environment.
|
||||
Should update self.observation, self.reward, self.done, self.measurements and self.info
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def _restart_environment_episode(self, force_environment_reset=False):
|
||||
def _restart_environment_episode(self, force_environment_reset=False) -> None:
|
||||
"""
|
||||
Restarts the simulator episode
|
||||
:param force_environment_reset: Force the environment to reset even if the episode is not done yet.
|
||||
:return:
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_rendered_image(self):
|
||||
def _render(self) -> None:
|
||||
"""
|
||||
Renders the environment using the native simulator renderer
|
||||
:return: None
|
||||
"""
|
||||
|
||||
def get_rendered_image(self) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy array containing the image that will be rendered to the screen.
|
||||
This can be different from the observation. For example, mujoco's observation is a measurements vector.
|
||||
:return: numpy array containing the image that will be rendered to the screen
|
||||
"""
|
||||
return self.observation
|
||||
</code></pre>
|
||||
</li>
|
||||
<li>
|
||||
<p>Make sure to import the environment in environments/__init__.py:</p>
|
||||
<pre><code>from doom_environment_wrapper import *
|
||||
</code></pre>
|
||||
<p>Also, a new entry should be added to the EnvTypes enum mapping the environment name to the wrapper's class name:</p>
|
||||
<pre><code>Doom = "DoomEnvironmentWrapper"
|
||||
<p>Create a new parameters class for your environment, which inherits the EnvironmentParameters class.
|
||||
In the <strong>init</strong> of your class, define all the parameters you used in your Environment class.
|
||||
Additionally, fill the path property of the class with the path to your Environment class.
|
||||
For example, take a look at the EnvironmentParameters class used for Doom:</p>
|
||||
<pre><code> class DoomEnvironmentParameters(EnvironmentParameters):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.default_input_filter = DoomInputFilter
|
||||
self.default_output_filter = DoomOutputFilter
|
||||
self.cameras = [DoomEnvironment.CameraTypes.OBSERVATION]
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return 'rl_coach.environments.doom_environment:DoomEnvironment'
|
||||
</code></pre>
|
||||
</li>
|
||||
<li>
|
||||
<p>In addition a new configuration class should be implemented for defining the environment's parameters and placed in configurations.py.
|
||||
For instance, the following is used for Doom:</p>
|
||||
<pre><code>class Doom(EnvironmentParameters):
|
||||
type = 'Doom'
|
||||
frame_skip = 4
|
||||
observation_stack_size = 3
|
||||
desired_observation_height = 60
|
||||
desired_observation_width = 76
|
||||
</code></pre>
|
||||
</li>
|
||||
<li>
|
||||
<p>And that's it, you're done. Now just add a new preset with your newly created environment, and start training an agent on top of it. </p>
|
||||
<p>And that's it, you're done. Now just add a new preset with your newly created environment, and start training an agent on top of it.</p>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
@@ -347,7 +307,7 @@ For instance, the following is used for Doom:</p>
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
|
||||
<a href="../add_agent/index.html" class="btn btn-neutral" title="Adding a New Agent"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../add_agent/" class="btn btn-neutral" title="Adding a New Agent"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -361,7 +321,7 @@ For instance, the following is used for Doom:</p>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -369,15 +329,20 @@ For instance, the following is used for Doom:</p>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../add_agent/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../add_agent/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -8,7 +8,6 @@ github.com style (c) Vasily Polovnyov <vast@whiteants.net>
|
||||
.hljs {
|
||||
display: block;
|
||||
overflow-x: auto;
|
||||
padding: 0.5em;
|
||||
color: #333;
|
||||
-webkit-text-size-adjust: none;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* theme. To aid upgradability this file should *not* be edited.
|
||||
* modifications we need should be included in theme_extra.css.
|
||||
*
|
||||
* https://github.com/rtfd/readthedocs.org/blob/master/media/css/sphinx_rtd_theme.css
|
||||
* https://github.com/rtfd/readthedocs.org/blob/master/readthedocs/core/static/core/css/theme.css
|
||||
*/
|
||||
|
||||
*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}[hidden]{display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:hover,a:active{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;color:#000;text-decoration:none}mark{background:#ff0;color:#000;font-style:italic;font-weight:bold}pre,code,.rst-content tt,kbd,samp{font-family:monospace,serif;_font-family:"courier new",monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:before,q:after{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}ul,ol,dl{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure{margin:0}form{margin:0}fieldset{border:0;margin:0;padding:0}label{cursor:pointer}legend{border:0;*margin-left:-7px;padding:0;white-space:normal}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*width:13px;*height:13px}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top;resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:0.2em 0;background:#ccc;color:#000;padding:0.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none !important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{html,body,section{background:none !important}*{box-shadow:none !important;text-shadow:none !important;filter:none !important;-ms-filter:none !important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}}.fa:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.btn,input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"],select,textarea,.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a,.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a,.wy-nav-top a{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}/*!
|
||||
|
||||
@@ -1,15 +1,3 @@
|
||||
/*
|
||||
* Tweak the overal size to better match RTD.
|
||||
*/
|
||||
body {
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
h3, h4, h5, h6 {
|
||||
color: #2980b9;
|
||||
font-weight: 300
|
||||
}
|
||||
|
||||
/*
|
||||
* Sphinx doesn't have support for section dividers like we do in
|
||||
* MkDocs, this styles the section titles in the nav
|
||||
@@ -34,10 +22,25 @@ h3, h4, h5, h6 {
|
||||
* area doesn't scroll.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/pull/202
|
||||
*
|
||||
* Builds upon pull 202 https://github.com/mkdocs/mkdocs/pull/202
|
||||
* to make toc scrollbar end before navigations buttons to not be overlapping.
|
||||
*/
|
||||
.wy-nav-side {
|
||||
height: 100%;
|
||||
height: calc(100% - 45px);
|
||||
overflow-y: auto;
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
.rst-versions{
|
||||
border-top: 0;
|
||||
height: 45px;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 768px) {
|
||||
.wy-nav-side {
|
||||
height: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -50,23 +53,49 @@ h3, h4, h5, h6 {
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix wrapping in the code highlighting
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/233
|
||||
*/
|
||||
code {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrap inline code samples otherwise they shoot of the side and
|
||||
* can't be read at all.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/313
|
||||
* https://github.com/mkdocs/mkdocs/issues/233
|
||||
* https://github.com/mkdocs/mkdocs/issues/834
|
||||
*/
|
||||
p code {
|
||||
code {
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
padding: 2px 5px;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make code blocks display as blocks and give them the appropriate
|
||||
* font size and padding.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/855
|
||||
* https://github.com/mkdocs/mkdocs/issues/834
|
||||
* https://github.com/mkdocs/mkdocs/issues/233
|
||||
*/
|
||||
pre code {
|
||||
white-space: pre;
|
||||
word-wrap: normal;
|
||||
display: block;
|
||||
padding: 12px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix link colors when the link text is inline code.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/718
|
||||
*/
|
||||
a code {
|
||||
color: #2980B9;
|
||||
}
|
||||
a:hover code {
|
||||
color: #3091d1;
|
||||
}
|
||||
a:visited code {
|
||||
color: #9B59B6;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -76,7 +105,7 @@ p code {
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/411
|
||||
*/
|
||||
code.cs, code.c {
|
||||
pre .cs, pre .c {
|
||||
font-weight: inherit;
|
||||
font-style: inherit;
|
||||
}
|
||||
@@ -99,21 +128,20 @@ code.cs, code.c {
|
||||
* Additions specific to the search functionality provided by MkDocs
|
||||
*/
|
||||
|
||||
#mkdocs-search-results article h3
|
||||
{
|
||||
.search-results article {
|
||||
margin-top: 23px;
|
||||
border-top: 1px solid #E1E4E5;
|
||||
padding-top: 24px;
|
||||
}
|
||||
|
||||
#mkdocs-search-results article:first-child h3 {
|
||||
.search-results article:first-child {
|
||||
border-top: none;
|
||||
}
|
||||
|
||||
#mkdocs-search-query{
|
||||
form .search-query {
|
||||
width: 100%;
|
||||
border-radius: 50px;
|
||||
padding: 6px 12px;
|
||||
padding: 6px 12px; /* csslint allow: box-model */
|
||||
border-color: #D1D4D5;
|
||||
}
|
||||
|
||||
@@ -124,3 +152,43 @@ code.cs, code.c {
|
||||
.wy-menu-vertical li ul.subnav ul.subnav{
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
.wy-menu-vertical .subnav li.current > a {
|
||||
padding-left: 2.42em;
|
||||
}
|
||||
.wy-menu-vertical .subnav li.current > ul li a {
|
||||
padding-left: 3.23em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Improve inline code blocks within admonitions.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/656
|
||||
*/
|
||||
.admonition code {
|
||||
color: #404040;
|
||||
border: 1px solid #c7c9cb;
|
||||
border: 1px solid rgba(0, 0, 0, 0.2);
|
||||
background: #f8fbfd;
|
||||
background: rgba(255, 255, 255, 0.7);
|
||||
}
|
||||
|
||||
/*
|
||||
* Account for wide tables which go off the side.
|
||||
* Override borders to avoid wierdness on narrow tables.
|
||||
*
|
||||
* https://github.com/mkdocs/mkdocs/issues/834
|
||||
* https://github.com/mkdocs/mkdocs/pull/1034
|
||||
*/
|
||||
.rst-content .section .docutils {
|
||||
width: 100%;
|
||||
overflow: auto;
|
||||
display: block;
|
||||
border: none;
|
||||
}
|
||||
|
||||
td, th {
|
||||
border: 1px solid #e1e4e5 !important; /* csslint allow: important */
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Coach Dashboard - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Coach Dashboard - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/highlight.css">
|
||||
<link href="../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Coach Dashboard";
|
||||
var mkdocs_page_input_path = "dashboard.md";
|
||||
var mkdocs_page_url = "/dashboard/";
|
||||
</script>
|
||||
|
||||
<script src="../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
||||
<script src="../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href=".." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,197 +45,148 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../index.html">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Coach Dashboard</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#visualizing-signals">Visualizing Signals</a></li>
|
||||
|
||||
|
||||
<li class="toctree-l3"><a href="#tracking-statistics">Tracking Statistics</a></li>
|
||||
|
||||
|
||||
<li class="toctree-l3"><a href="#comparing-runs">Comparing Runs</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1 current">
|
||||
|
||||
<a class="current" href="./">Coach Dashboard</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l2"><a href="#visualizing-signals">Visualizing Signals</a></li>
|
||||
|
||||
|
||||
<li class="toctree-l2"><a href="#tracking-statistics">Tracking Statistics</a></li>
|
||||
|
||||
|
||||
<li class="toctree-l2"><a href="#comparing-runs">Comparing Runs</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -251,7 +198,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -259,7 +206,7 @@
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html">Docs</a> »</li>
|
||||
<li><a href="..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
@@ -352,10 +299,10 @@
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../contributing/add_agent/index.html" class="btn btn-neutral float-right" title="Adding a New Agent"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../contributing/add_agent/" class="btn btn-neutral float-right" title="Adding a New Agent">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../algorithms/imitation/bc/index.html" class="btn btn-neutral" title="Behavioral Cloning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href="../algorithms/imitation/bc/" class="btn btn-neutral" title="Behavioral Cloning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -369,7 +316,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -377,17 +324,22 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../algorithms/imitation/bc/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href="../algorithms/imitation/bc/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../contributing/add_agent/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../contributing/add_agent/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '..';</script>
|
||||
<script src="../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../search/require.js"></script>
|
||||
<script src="../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
367
docs/design/control_flow/index.html
Normal file
@@ -0,0 +1,367 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
<title>Control Flow - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Control Flow";
|
||||
var mkdocs_page_input_path = "design/control_flow.md";
|
||||
var mkdocs_page_url = "/design/control_flow/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../features/">Features</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Control Flow</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l3"><a href="#coach-control-flow">Coach Control Flow</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a class="toctree-l4" href="#graph-manager">Graph Manager</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#level-manager">Level Manager</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#agent">Agent</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
<li>Design »</li>
|
||||
|
||||
|
||||
|
||||
<li>Control Flow</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
<!-- language-all: python -->
|
||||
|
||||
<h1 id="coach-control-flow">Coach Control Flow</h1>
|
||||
<p>Coach is built in a modular way, encouraging modules reuse and reducing the amount of boilerplate code needed
|
||||
for developing new algorithms or integrating a new challenge as an environment.
|
||||
On the other hand, it can be overwhelming for new users to ramp up on the code.
|
||||
To help with that, here's a short overview of the control flow.</p>
|
||||
<h2 id="graph-manager">Graph Manager</h2>
|
||||
<p>The main entry point for Coach is <strong>coach.py</strong>.
|
||||
The main functionality of this script is to parse the command line arguments and invoke all the sub-processes needed
|
||||
for the given experiment.
|
||||
<strong>coach.py</strong> executes the given <strong>preset</strong> file which returns a <strong>GraphManager</strong> object.</p>
|
||||
<p>A <strong>preset</strong> is a design pattern that is intended for concentrating the entire definition of an experiment in a single
|
||||
file. This helps with experiments reproducibility, improves readability and prevents confusion.
|
||||
The outcome of a preset is a <strong>GraphManager</strong> which will usually be instantiated in the final lines of the preset.</p>
|
||||
<p>A <strong>GraphManager</strong> is an object that holds all the agents and environments of an experiment, and is mostly responsible
|
||||
for scheduling their work. Why is it called a <strong>graph</strong> manager? Because agents and environments are structured into
|
||||
a graph of interactions. For example, in hierarchical reinforcement learning schemes, there will often be a master
|
||||
policy agent, that will control a sub-policy agent, which will interact with the environment. Other schemes can have
|
||||
much more complex graphs of control, such as several hierarchy layers, each with multiple agents.
|
||||
The graph manager's main loop is the improve loop.</p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../../img/improve.png" alt="Improve loop" style="width: 400px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<p>The improve loop skips between 3 main phases - heatup, training and evaluation:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>Heatup</strong> - the goal of this phase is to collect initial data for populating the replay buffers. The heatup phase
|
||||
takes place only in the beginning of the experiment, and the agents will act completely randomly during this phase.
|
||||
Importantly, the agents do not train their networks during this phase. DQN for example, uses 50k random steps in order
|
||||
to initialize the replay buffers.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Training</strong> - the training phase is the main phase of the experiment. This phase can change between agent types,
|
||||
but essentially consists of repeated cycles of acting, collecting data from the environment, and training the agent
|
||||
networks. During this phase, the agent will use its exploration policy in training mode, which will add noise to its
|
||||
actions in order to improve its knowledge about the environment state space.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Evaluation</strong> - the evaluation phase is intended for evaluating the current performance of the agent. The agents
|
||||
will act greedily in order to exploit the knowledge aggregated so far and the performance over multiple episodes of
|
||||
evaluation will be averaged in order to reduce the stochasticity effects of all the components.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<h2 id="level-manager">Level Manager</h2>
|
||||
<p>In each of the 3 phases described above, the graph manager will invoke all the hierarchy levels in the graph in a
|
||||
synchronized manner. In Coach, agents do not interact directly with the environment. Instead, they go through a
|
||||
<em>LevelManager</em>, which is a proxy that manages their interaction. The level manager passes the current state and reward
|
||||
from the environment to the agent, and the actions from the agent to the environment.</p>
|
||||
<p>The motivation for having a level manager is to disentangle the code of the environment and the agent, so to allow more
|
||||
complex interactions. Each level can have multiple agents which interact with the environment. Who gets to choose the
|
||||
action for each step is controlled by the level manager.
|
||||
Additionally, each level manager can act as an environment for the hierarchy level above it, such that each hierarchy
|
||||
level can be seen as an interaction between an agent and an environment, even if the environment is just more agents in
|
||||
a lower hierarchy level.</p>
|
||||
<h2 id="agent">Agent</h2>
|
||||
<p>The base agent class has 3 main function that will be used during those phases - observe, act and train.</p>
|
||||
<ul>
|
||||
<li><strong>Observe</strong> - this function gets the latest response from the environment as input, and updates the internal state
|
||||
of the agent with the new information. The environment response will
|
||||
be first passed through the agent's <strong>InputFilter</strong> object, which will process the values in the response, according
|
||||
to the specific agent definition. The environment response will then be converted into a
|
||||
<strong>Transition</strong> which will contain the information from a single step
|
||||
(<script type="math/tex"> s_{t}, a_{t}, r_{t}, s_{t+1}, terminal signal </script>), and store it in the memory.</li>
|
||||
</ul>
|
||||
<p><img src="../../img/observe.png" alt="Observe" style="width: 700px;"/></p>
|
||||
<ul>
|
||||
<li><strong>Act</strong> - this function uses the current internal state of the agent in order to select the next action to take on
|
||||
the environment. This function will call the per-agent custom function <strong>choose_action</strong> that will use the network
|
||||
and the exploration policy in order to select an action. The action will be stored, together with any additional
|
||||
information (like the action value for example) in an <strong>ActionInfo</strong> object. The ActionInfo object will then be
|
||||
passed through the agent's <strong>OutputFilter</strong> to allow any processing of the action (like discretization,
|
||||
or shifting, for example), before passing it to the environment.</li>
|
||||
</ul>
|
||||
<p><img src="../../img/act.png" alt="Act" style="width: 700px;"/></p>
|
||||
<ul>
|
||||
<li><strong>Train</strong> - this function will sample a batch from the memory and train on it. The batch of transitions will be
|
||||
first wrapped into a <strong>Batch</strong> object to allow efficient querying of the batch values. It will then be passed into
|
||||
the agent specific <strong>learn_from_batch</strong> function, that will extract network target values from the batch and will
|
||||
train the networks accordingly. Lastly, if there's a target network defined for the agent, it will sync the target
|
||||
network weights with the online network.</li>
|
||||
</ul>
|
||||
<p><img src="../../img/train.png" alt="Train" style="width: 700px;"/></p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../network/" class="btn btn-neutral float-right" title="Network">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../features/" class="btn btn-neutral" title="Features"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../features/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../network/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
328
docs/design/features/index.html
Normal file
@@ -0,0 +1,328 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
<title>Features - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Features";
|
||||
var mkdocs_page_input_path = "design/features.md";
|
||||
var mkdocs_page_url = "/design/features/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Features</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l3"><a href="#coach-features">Coach Features</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a class="toctree-l4" href="#supported-algorithms">Supported Algorithms</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#supported-environments">Supported Environments</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
<li>Design »</li>
|
||||
|
||||
|
||||
|
||||
<li>Features</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
<h1 id="coach-features">Coach Features</h1>
|
||||
<h2 id="supported-algorithms">Supported Algorithms</h2>
|
||||
<p>Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into two main classes -
|
||||
value optimization and policy optimization. A detailed description of those algorithms may be found in the algorithms
|
||||
section.</p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../../img/algorithms.png" alt="Supported Algorithms" style="width: 600px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<h2 id="supported-environments">Supported Environments</h2>
|
||||
<p>Coach supports a large number of environments which can be solved using reinforcement learning:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/deepmind/dm_control">DeepMind Control Suite</a></strong> - a set of reinforcement learning environments
|
||||
powered by the MuJoCo physics engine.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/deepmind/pysc2">Blizzard Starcraft II</a></strong> - a popular strategy game which was wrapped with a
|
||||
python interface by DeepMind.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="http://vizdoom.cs.put.edu.pl/">ViZDoom</a></strong> - a Doom-based AI research platform for reinforcement learning
|
||||
from raw visual information.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/carla-simulator/carla">CARLA</a></strong> - an open-source simulator for autonomous driving research.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="https://gym.openai.com/">OpenAI Gym</a></strong> - a library which consists of a set of environments, from games to robotics.
|
||||
Additionally, it can be extended using the API defined by the authors.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<p>In Coach, we support all the native environments in Gym, along with several extensions such as:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/openai/roboschool">Roboschool</a></strong> - a set of environments powered by the PyBullet engine,
|
||||
that offer a free alternative to MuJoCo.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/Breakend/gym-extensions">Gym Extensions</a></strong> - a set of environments that extends Gym for
|
||||
auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.)</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong><a href="https://github.com/bulletphysics/bullet3/tree/master/examples/pybullet">PyBullet</a></strong> - a physics engine that
|
||||
includes a set of robotics environments.</p>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../control_flow/" class="btn btn-neutral float-right" title="Control Flow">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../../usage/" class="btn btn-neutral" title="Usage"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../../usage/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../control_flow/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
416
docs/design/filters/index.html
Normal file
@@ -0,0 +1,416 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
<title>Filters - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Filters";
|
||||
var mkdocs_page_input_path = "design/filters.md";
|
||||
var mkdocs_page_url = "/design/filters/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../network/">Network</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Filters</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l3"><a href="#filters">Filters</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a class="toctree-l4" href="#input-filters">Input Filters</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#output-filters">Output Filters</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
<li>Design »</li>
|
||||
|
||||
|
||||
|
||||
<li>Filters</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
<h1 id="filters">Filters</h1>
|
||||
<p>Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information.
|
||||
There are two filter categories -</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>Input filters</strong> - these are filters that process the information passed <strong>into</strong> the agent from the environment.
|
||||
This information includes the observation and the reward. Input filters therefore allow rescaling observations,
|
||||
normalizing rewards, stack observations, etc.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Output filters</strong> - these are filters that process the information going <strong>out</strong> of the agent into the environment.
|
||||
This information includes the action the agent chooses to take. Output filters therefore allow conversion of
|
||||
actions from one space into another. For example, the agent can take <script type="math/tex"> N </script> discrete actions, that will be mapped by
|
||||
the output filter onto <script type="math/tex"> N </script> continuous actions.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<p>Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs.</p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../../img/filters.png" alt="Filters mechanism" style="width: 350px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<h2 id="input-filters">Input Filters</h2>
|
||||
<p>The input filters are separated into two categories - <strong>observation filters</strong> and <strong>reward filters</strong>.</p>
|
||||
<h3 id="observation-filters">Observation Filters</h3>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>ObservationClippingFilter</strong> - Clips the observation values to a given range of values. For example, if the
|
||||
observation consists of measurements in an arbitrary range, and we want to control the minimum and maximum values
|
||||
of these observations, we can define a range and clip the values of the measurements.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationCropFilter</strong> - Crops the size of the observation to a given crop window. For example, in Atari, the
|
||||
observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a
|
||||
square of 160x160 before rescaling them.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationMoveAxisFilter</strong> - Reorders the axes of the observation. This can be useful when the observation is an
|
||||
image, and we want to move the channel axis to be the last axis instead of the first axis.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationNormalizationFilter</strong> - Normalizes the observation values with a running mean and standard deviation of
|
||||
all the observations seen so far. The normalization is performed element-wise. Additionally, when working with
|
||||
multiple workers, the statistics used for the normalization operation are accumulated over all the workers.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationReductionBySubPartsNameFilter</strong> - Allows keeping only parts of the observation, by specifying their
|
||||
name. For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as
|
||||
speed and location. If we want to only use the speed, it can be done using this filter.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationRescaleSizeByFactorFilter</strong> - Rescales an image observation by some factor. For example, the image size
|
||||
can be reduced by a factor of 2.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationRescaleToSizeFilter</strong> - Rescales an image observation to a given size. The target size does not
|
||||
necessarily keep the aspect ratio of the original observation.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationRGBToYFilter</strong> - Converts a color image observation specified using the RGB encoding into a grayscale
|
||||
image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors
|
||||
in the original image are not relevant for solving the task at hand.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationSqueezeFilter</strong> - Removes redundant axes from the observation, which are axes with a dimension of 1.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationStackingFilter</strong> - Stacks several observations on top of each other. For image observation this will
|
||||
create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this,
|
||||
a LazyStack object is used in order to wrap the observations in the stack. For this reason, the
|
||||
ObservationStackingFilter <strong>must</strong> be the last filter in the inputs filters stack.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>ObservationUint8Filter</strong> - Converts a floating point observation into an unsigned int 8 bit observation. This is
|
||||
mostly useful for reducing memory consumption and is usually used for image observations. The filter will first
|
||||
spread the observation values over the range 0-255 and then discretize them into integer values.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<h3 id="reward-filters">Reward Filters</h3>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>RewardClippingFilter</strong> - Clips the reward values into a given range. For example, in DQN, the Atari rewards are
|
||||
clipped into the range -1 and 1 in order to control the scale of the returns.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>RewardNormalizationFilter</strong> - Normalizes the reward values with a running mean and standard deviation of
|
||||
all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation
|
||||
are accumulated over all the workers.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>RewardRescaleFilter</strong> - Rescales the reward by a given factor. Rescaling the rewards of the environment has been
|
||||
observed to have a large effect (negative or positive) on the behavior of the learning process.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<h2 id="output-filters">Output Filters</h2>
|
||||
<p>The output filters only process the actions.</p>
|
||||
<h3 id="action-filters">Action Filters</h3>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>AttentionDiscretization</strong> - Discretizes an <strong>AttentionActionSpace</strong>. The attention action space defines the actions
|
||||
as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing
|
||||
a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop
|
||||
windows to choose into a finite number of options, and map a discrete action space into those crop windows.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>BoxDiscretization</strong> - Discretizes a continuous action space into a discrete action space, allowing the usage of
|
||||
agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the
|
||||
original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete
|
||||
action index. For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action
|
||||
space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>BoxMasking</strong> - Masks part of the action space to enforce the agent to work in a defined space. For example,
|
||||
if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions
|
||||
to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>PartialDiscreteActionSpaceMap</strong> - Partial map of two countable action spaces. For example, consider an environment
|
||||
with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual
|
||||
MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can
|
||||
map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to
|
||||
use regular discrete actions, and mask 3 of the actions from the agent.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>FullDiscreteActionSpaceMap</strong> - Full map of two countable action spaces. This works in a similar way to the
|
||||
PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without
|
||||
masking any actions.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>LinearBoxToBoxMap</strong> - A linear mapping of two box action spaces. For example, if the action space of the
|
||||
environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1,
|
||||
the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the
|
||||
action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped
|
||||
between those values.</p>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../../algorithms/value_optimization/dqn/" class="btn btn-neutral float-right" title="DQN">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../network/" class="btn btn-neutral" title="Network"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../network/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../../algorithms/value_optimization/dqn/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,363 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Design - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../img/favicon.ico">
|
||||
|
||||
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/highlight.css">
|
||||
<link href="../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Design";
|
||||
</script>
|
||||
|
||||
<script src="../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
||||
<script src="../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../index.html">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Design</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#coach-design">Coach Design</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#network-design">Network Design</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#keeping-network-copies-in-sync">Keeping Network Copies in Sync</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#supported-algorithms">Supported Algorithms</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">Reinforcement Learning Coach Documentation</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
<li>Design</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
<h1 id="coach-design">Coach Design</h1>
|
||||
<h2 id="network-design">Network Design</h2>
|
||||
<p>Each agent has at least one neural network, used as the function approximator, for choosing the actions. The network is designed in a modular way to allow reusability in different agents. It is separated into three main parts:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>Input Embedders</strong> - This is the first stage of the network, meant to convert the input into a feature vector representation. It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs. </p>
|
||||
<p>There are two main types of input embedders: </p>
|
||||
<ol>
|
||||
<li>Image embedder - Convolutional neural network. </li>
|
||||
<li>Vector embedder - Multi-layer perceptron. </li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Middlewares</strong> - The middleware gets the output of the input embedder, and processes it into a different representation domain, before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of several input embedders, and pass them through some extra processing. This, for instance, might include an LSTM or just a plain simple FC layer.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Output Heads</strong> - The output head is used in order to predict the values required from the network. These might include action-values, state-values or a policy. As with the input embedders, it is possible to use several output heads in the same network. For example, the <em>Actor Critic</em> agent combines two heads - a policy head and a state-value head.
|
||||
In addition, the output heads defines the loss function according to the head type.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<p></p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../img/network.png" alt="Network Design" style="width: 400px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<h2 id="keeping-network-copies-in-sync">Keeping Network Copies in Sync</h2>
|
||||
<p>Most of the reinforcement learning agents include more than one copy of the neural network. These copies serve as counterparts of the main network which are updated in different rates, and are often synchronized either locally or between parallel workers. For easier synchronization of those copies, a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent. </p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../img/distributed.png" alt="Distributed Training" style="width: 600px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<h2 id="supported-algorithms">Supported Algorithms</h2>
|
||||
<p>Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into two main classes - value optimization and policy optimization. A detailed description of those algorithms may be found in the algorithms section.</p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../img/algorithms.png" alt="Supported Algorithms" style="width: 600px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../usage/index.html" class="btn btn-neutral float-right" title="Usage"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../index.html" class="btn btn-neutral" title="Home"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../usage/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
310
docs/design/network/index.html
Normal file
@@ -0,0 +1,310 @@
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../../img/favicon.ico">
|
||||
<title>Network - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../css/highlight.css">
|
||||
<link href="../../extra.css" rel="stylesheet">
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Network";
|
||||
var mkdocs_page_input_path = "design/network.md";
|
||||
var mkdocs_page_url = "/design/network/";
|
||||
</script>
|
||||
|
||||
<script src="../../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav" role="document">
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../.." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../..">Home</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class=" current">
|
||||
|
||||
<a class="current" href="./">Network</a>
|
||||
<ul class="subnav">
|
||||
|
||||
<li class="toctree-l3"><a href="#network-design">Network Design</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a class="toctree-l4" href="#keeping-network-copies-in-sync">Keeping Network Copies in Sync</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
<li>Design »</li>
|
||||
|
||||
|
||||
|
||||
<li>Network</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main">
|
||||
<div class="section">
|
||||
|
||||
<h1 id="network-design">Network Design</h1>
|
||||
<p>Each agent has at least one neural network, used as the function approximator, for choosing the actions. The network is designed in a modular way to allow reusability in different agents. It is separated into three main parts:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p><strong>Input Embedders</strong> - This is the first stage of the network, meant to convert the input into a feature vector representation. It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs. </p>
|
||||
<p>There are two main types of input embedders: </p>
|
||||
<ol>
|
||||
<li>Image embedder - Convolutional neural network. </li>
|
||||
<li>Vector embedder - Multi-layer perceptron. </li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Middlewares</strong> - The middleware gets the output of the input embedder, and processes it into a different representation domain, before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of several input embedders, and pass them through some extra processing. This, for instance, might include an LSTM or just a plain simple FC layer.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Output Heads</strong> - The output head is used in order to predict the values required from the network. These might include action-values, state-values or a policy. As with the input embedders, it is possible to use several output heads in the same network. For example, the <em>Actor Critic</em> agent combines two heads - a policy head and a state-value head.
|
||||
In addition, the output heads defines the loss function according to the head type.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<p></p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../../img/network.png" alt="Network Design" style="width: 400px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
<h2 id="keeping-network-copies-in-sync">Keeping Network Copies in Sync</h2>
|
||||
<p>Most of the reinforcement learning agents include more than one copy of the neural network. These copies serve as counterparts of the main network which are updated in different rates, and are often synchronized either locally or between parallel workers. For easier synchronization of those copies, a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent. </p>
|
||||
<p style="text-align: center;">
|
||||
|
||||
<img src="../../img/distributed.png" alt="Distributed Training" style="width: 600px;"/>
|
||||
|
||||
</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../filters/" class="btn btn-neutral float-right" title="Filters">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../control_flow/" class="btn btn-neutral" title="Control Flow"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<!-- Copyright etc -->
|
||||
|
||||
</div>
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../control_flow/" style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../filters/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '../..';</script>
|
||||
<script src="../../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../../search/require.js"></script>
|
||||
<script src="../../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
1
docs/diagrams.xml
Normal file
|
Before Width: | Height: | Size: 355 KiB After Width: | Height: | Size: 193 KiB |
BIN
docs/img/act.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
docs/img/filters.png
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
docs/img/graph.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
docs/img/improve.png
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
docs/img/level.png
Normal file
|
After Width: | Height: | Size: 24 KiB |
BIN
docs/img/observe.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
docs/img/train.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
360
docs/index.html
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta name="description" content="Reinforcement Learning Coach by Intel Nervana.">
|
||||
|
||||
<title>Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="./img/favicon.ico">
|
||||
|
||||
|
||||
<title>Home - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="./css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="./css/highlight.css">
|
||||
<link href="./extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "None";
|
||||
var mkdocs_page_name = "Home";
|
||||
var mkdocs_page_input_path = "index.md";
|
||||
var mkdocs_page_url = "/";
|
||||
</script>
|
||||
|
||||
<script src="./js/jquery-2.1.1.min.js"></script>
|
||||
<script src="./js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="./js/highlight.pack.js"></script>
|
||||
<script src="./js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="./js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="./index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,197 +45,152 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Home</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#what-is-coach">What is Coach?</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#motivation">Motivation</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#solution">Solution</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#design">Design</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
|
||||
<a class="current" href=".">Home</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
<li class="toctree-l2"><a href="#what-is-coach">What is Coach?</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l3" href="#motivation">Motivation</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#solution">Solution</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#design">Design</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -251,7 +202,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="./index.html">Reinforcement Learning Coach Documentation</a>
|
||||
<a href=".">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -259,7 +210,7 @@
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="./index.html">Docs</a> »</li>
|
||||
<li><a href=".">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
@@ -281,7 +232,7 @@
|
||||
With Coach, it is possible to model an agent by combining various building blocks, and training the agent on multiple environments.
|
||||
The available environments allow testing the agent in different practical fields such as robotics, autonomous driving, games and more.
|
||||
Coach collects statistics from the training process and supports advanced visualization techniques for debugging the agent being trained.</p>
|
||||
<p>Blog post from the Intel® Nervana™ website can be found <a href="https://www.intelnervana.com/reinforcement-learning-coach-intel">here</a>. </p>
|
||||
<p>Blog post from the Intel® AI website can be found <a href="https://ai.intel.com/reinforcement-learning-coach-intel/">here</a>.</p>
|
||||
<p>GitHub repository is <a href="https://github.com/NervanaSystems/coach">here</a>. </p>
|
||||
<h2 id="design">Design</h2>
|
||||
<p><img src="img/design.png" alt="Coach Design" style="width: 800px;"/></p>
|
||||
@@ -292,7 +243,7 @@ Coach collects statistics from the training process and supports advanced visual
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="design/index.html" class="btn btn-neutral float-right" title="Design"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="usage/" class="btn btn-neutral float-right" title="Usage">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
</div>
|
||||
@@ -307,7 +258,7 @@ Coach collects statistics from the training process and supports advanced visual
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -315,20 +266,25 @@ Coach collects statistics from the training process and supports advanced visual
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="design/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="usage/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '.';</script>
|
||||
<script src="./js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="./search/require.js"></script>
|
||||
<script src="./search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!--
|
||||
MkDocs version : 0.14.0
|
||||
Build Date UTC : 2017-12-18 18:59:45.506407
|
||||
MkDocs version : 0.17.5
|
||||
Build Date UTC : 2018-08-09 12:14:19
|
||||
-->
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
$( document ).ready(function() {
|
||||
|
||||
// Shift nav in mobile when clicking the menu.
|
||||
$(document).on('click', "[data-toggle='wy-nav-top']", function() {
|
||||
$("[data-toggle='wy-nav-shift']").toggleClass("shift");
|
||||
@@ -12,6 +11,23 @@ $( document ).ready(function() {
|
||||
$("[data-toggle='rst-versions']").toggleClass("shift");
|
||||
});
|
||||
|
||||
// Keyboard navigation
|
||||
document.addEventListener("keydown", function(e) {
|
||||
if ($(e.target).is(':input')) return true;
|
||||
var key = e.which || e.keyCode || window.event && window.event.keyCode;
|
||||
var page;
|
||||
switch (key) {
|
||||
case 39: // right arrow
|
||||
page = $('[role="navigation"] a:contains(Next):first').prop('href');
|
||||
break;
|
||||
case 37: // left arrow
|
||||
page = $('[role="navigation"] a:contains(Previous):first').prop('href');
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
if (page) window.location.href = page;
|
||||
});
|
||||
|
||||
$(document).on('click', "[data-toggle='rst-current-version']", function() {
|
||||
$("[data-toggle='rst-versions']").toggleClass("shift-up");
|
||||
});
|
||||
@@ -53,3 +69,31 @@ window.SphinxRtdTheme = (function (jquery) {
|
||||
StickyNav : stickyNav
|
||||
};
|
||||
}($));
|
||||
|
||||
// The code below is a copy of @seanmadsen code posted Jan 10, 2017 on issue 803.
|
||||
// https://github.com/mkdocs/mkdocs/issues/803
|
||||
// This just incorporates the auto scroll into the theme itself without
|
||||
// the need for additional custom.js file.
|
||||
//
|
||||
$(function() {
|
||||
$.fn.isFullyWithinViewport = function(){
|
||||
var viewport = {};
|
||||
viewport.top = $(window).scrollTop();
|
||||
viewport.bottom = viewport.top + $(window).height();
|
||||
var bounds = {};
|
||||
bounds.top = this.offset().top;
|
||||
bounds.bottom = bounds.top + this.outerHeight();
|
||||
return ( ! (
|
||||
(bounds.top <= viewport.top) ||
|
||||
(bounds.bottom >= viewport.bottom)
|
||||
) );
|
||||
};
|
||||
if( $('li.toctree-l1.current').length && !$('li.toctree-l1.current').isFullyWithinViewport() ) {
|
||||
$('.wy-nav-side')
|
||||
.scrollTop(
|
||||
$('li.toctree-l1.current').offset().top -
|
||||
$('.wy-nav-side').offset().top -
|
||||
60
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
7
docs/mkdocs/js/lunr-0.5.7.min.js
vendored
326
docs/search.html
@@ -3,31 +3,22 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="./img/favicon.ico">
|
||||
|
||||
|
||||
<title>Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="./css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="./css/highlight.css">
|
||||
<link href="./extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script src="./js/jquery-2.1.1.min.js"></script>
|
||||
<script src="./js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="./js/highlight.pack.js"></script>
|
||||
<script src="./js/theme.js"></script>
|
||||
<script>var base_url = '.';</script>
|
||||
<script data-main="./mkdocs/js/search.js" src="./mkdocs/js/require.js"></script>
|
||||
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="./js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -38,7 +29,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="./index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href="." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -47,184 +38,136 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="./index.html">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href=".">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="usage/">Usage</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="usage/index.html">Usage</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -236,7 +179,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="./index.html">Reinforcement Learning Coach Documentation</a>
|
||||
<a href=".">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -244,7 +187,7 @@
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="./index.html">Docs</a> »</li>
|
||||
<li><a href=".">Docs</a> »</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
@@ -264,8 +207,8 @@
|
||||
<input name="q" id="mkdocs-search-query" type="text" class="search_input search-query ui-autocomplete-input" placeholder="Search the Docs" autocomplete="off" autofocus>
|
||||
</form>
|
||||
|
||||
<div id="mkdocs-search-results">
|
||||
Sorry, page not found.
|
||||
<div id="mkdocs-search-results" class="search-results">
|
||||
Searching...
|
||||
</div>
|
||||
|
||||
|
||||
@@ -283,7 +226,7 @@
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -291,13 +234,18 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '.';</script>
|
||||
<script src="./js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="./search/require.js"></script>
|
||||
<script src="./search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
7
docs/search/lunr.min.js
vendored
Normal file
@@ -1,8 +1,12 @@
|
||||
require.config({
|
||||
baseUrl: base_url + "/search/"
|
||||
});
|
||||
|
||||
require([
|
||||
base_url + '/mkdocs/js/mustache.min.js',
|
||||
base_url + '/mkdocs/js/lunr-0.5.7.min.js',
|
||||
'mustache.min',
|
||||
'lunr.min',
|
||||
'text!search-results-template.mustache',
|
||||
'text!../search_index.txt',
|
||||
'text!search_index.json',
|
||||
], function (Mustache, lunr, results_template, data) {
|
||||
"use strict";
|
||||
|
||||
@@ -70,7 +74,7 @@ require([
|
||||
*/
|
||||
jQuery('#mkdocs_search_modal a').click(function(){
|
||||
jQuery('#mkdocs_search_modal').modal('hide');
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
};
|
||||
@@ -83,6 +87,6 @@ require([
|
||||
search();
|
||||
}
|
||||
|
||||
search_input.addEventListener("keyup", search);
|
||||
if (search_input){search_input.addEventListener("keyup", search);}
|
||||
|
||||
});
|
||||
704
docs/search/search_index.json
Normal file
147
docs/sitemap.xml
@@ -1,158 +1,133 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/design/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/usage/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/usage/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/design/features/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/dqn/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/design/control_flow/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/double_dqn/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/design/network/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/dueling_dqn/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/design/filters/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/categorical_dqn/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/dqn/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/mmc/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/double_dqn/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/pal/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/dueling_dqn/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/nec/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/categorical_dqn/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/bs_dqn/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/mmc/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/n_step/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/pal/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/value_optimization/naf/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/nec/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/policy_optimization/pg/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/bs_dqn/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/policy_optimization/ac/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/n_step/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/policy_optimization/ddpg/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/value_optimization/naf/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/policy_optimization/ppo/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/policy_optimization/pg/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/policy_optimization/cppo/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/policy_optimization/ac/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/other/dfp/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/policy_optimization/ddpg/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/algorithms/imitation/bc/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/policy_optimization/ppo/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/dashboard/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/policy_optimization/cppo/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
|
||||
<url>
|
||||
<loc>None/contributing/add_agent/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/other/dfp/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>None/contributing/add_env/</loc>
|
||||
<lastmod>2017-12-18</lastmod>
|
||||
<loc>/algorithms/imitation/bc/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>/dashboard/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>/contributing/add_agent/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>/contributing/add_env/</loc>
|
||||
<lastmod>2018-08-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
|
||||
|
||||
|
||||
</urlset>
|
||||
@@ -3,33 +3,29 @@
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Usage - Reinforcement Learning Coach Documentation</title>
|
||||
|
||||
|
||||
<link rel="shortcut icon" href="../img/favicon.ico">
|
||||
|
||||
|
||||
<title>Usage - Reinforcement Learning Coach</title>
|
||||
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
||||
|
||||
<link rel="stylesheet" href="../css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../css/highlight.css">
|
||||
<link href="../extra.css" rel="stylesheet">
|
||||
|
||||
|
||||
<script>
|
||||
// Current page data
|
||||
var mkdocs_page_name = "Usage";
|
||||
var mkdocs_page_input_path = "usage.md";
|
||||
var mkdocs_page_url = "/usage/";
|
||||
</script>
|
||||
|
||||
<script src="../js/jquery-2.1.1.min.js"></script>
|
||||
<script src="../js/modernizr-2.8.3.min.js"></script>
|
||||
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
||||
<script src="../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
|
||||
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
@@ -40,7 +36,7 @@
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
||||
<div class="wy-side-nav-search">
|
||||
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
||||
<a href=".." class="icon icon-home"> Reinforcement Learning Coach</a>
|
||||
<div role="search">
|
||||
<form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
@@ -49,205 +45,160 @@
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
<ul class="current">
|
||||
<ul class="current">
|
||||
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../index.html">Home</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="..">Home</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../design/index.html">Design</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
<a class="current" href="./index.html">Usage</a>
|
||||
|
||||
<ul>
|
||||
|
||||
<li class="toctree-l3"><a href="#coach-usage">Coach Usage</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#training-an-agent">Training an Agent</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#evaluating-an-agent">Evaluating an Agent</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#playing-with-the-environment-as-a-human">Playing with the Environment as a Human</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#learning-through-imitation-learning">Learning Through Imitation Learning</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#visualizations">Visualizations</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#switching-between-deep-learning-frameworks">Switching between deep learning frameworks</a></li>
|
||||
|
||||
<li><a class="toctree-l4" href="#additional-flags">Additional Flags</a></li>
|
||||
|
||||
|
||||
</ul>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 current">
|
||||
|
||||
<a class="current" href="./">Usage</a>
|
||||
<ul class="subnav">
|
||||
<li><span>Algorithms</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dqn/index.html">DQN</a>
|
||||
<li class="toctree-l2"><a href="#coach-usage">Coach Usage</a></li>
|
||||
|
||||
<ul>
|
||||
|
||||
</li>
|
||||
<li><a class="toctree-l3" href="#training-an-agent">Training an Agent</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#evaluating-an-agent">Evaluating an Agent</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#playing-with-the-environment-as-a-human">Playing with the Environment as a Human</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#learning-through-imitation-learning">Learning Through Imitation Learning</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#visualizations">Visualizations</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#switching-between-deep-learning-frameworks">Switching between deep learning frameworks</a></li>
|
||||
|
||||
<li><a class="toctree-l3" href="#additional-flags">Additional Flags</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../dashboard/index.html">Coach Dashboard</a>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
|
||||
<li>
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Design</span>
|
||||
<ul class="subnav">
|
||||
<li><span>Contributing</span></li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_agent/index.html">Adding a New Agent</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
<li class="toctree-l1 ">
|
||||
<a class="" href="../contributing/add_env/index.html">Adding a New Environment</a>
|
||||
|
||||
</li>
|
||||
|
||||
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/features/">Features</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/control_flow/">Control Flow</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/network/">Network</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../design/filters/">Filters</a>
|
||||
</li>
|
||||
</ul>
|
||||
<li>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Algorithms</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/dqn/">DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/double_dqn/">Double DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/nec/">Neural Episodic Control</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/n_step/">N-Step Q Learning</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/pg/">Policy Gradient</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ac/">Actor-Critic</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/other/dfp/">Direct Future Prediction</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../algorithms/imitation/bc/">Behavioral Cloning</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<a class="" href="../dashboard/">Coach Dashboard</a>
|
||||
</li>
|
||||
|
||||
<li class="toctree-l1">
|
||||
|
||||
<span class="caption-text">Contributing</span>
|
||||
<ul class="subnav">
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../contributing/add_agent/">Adding a New Agent</a>
|
||||
</li>
|
||||
<li class="">
|
||||
|
||||
<a class="" href="../contributing/add_env/">Adding a New Environment</a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
@@ -259,7 +210,7 @@
|
||||
|
||||
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">Reinforcement Learning Coach Documentation</a>
|
||||
<a href="..">Reinforcement Learning Coach</a>
|
||||
</nav>
|
||||
|
||||
|
||||
@@ -267,7 +218,7 @@
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html">Docs</a> »</li>
|
||||
<li><a href="..">Docs</a> »</li>
|
||||
|
||||
|
||||
|
||||
@@ -463,10 +414,10 @@ The most up to date description can be found by using the <code>-h</code> flag.<
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="../algorithms/value_optimization/dqn/index.html" class="btn btn-neutral float-right" title="DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
<a href="../design/features/" class="btn btn-neutral float-right" title="Features">Next <span class="icon icon-circle-arrow-right"></span></a>
|
||||
|
||||
|
||||
<a href="../design/index.html" class="btn btn-neutral" title="Design"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
<a href=".." class="btn btn-neutral" title="Home"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -480,7 +431,7 @@ The most up to date description can be found by using the <code>-h</code> flag.<
|
||||
|
||||
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
</footer>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -488,17 +439,22 @@ The most up to date description can be found by using the <code>-h</code> flag.<
|
||||
|
||||
</div>
|
||||
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<div class="rst-versions" role="note" style="cursor: pointer">
|
||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
||||
|
||||
|
||||
<span><a href="../design/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
||||
<span><a href=".." style="color: #fcfcfc;">« Previous</a></span>
|
||||
|
||||
|
||||
<span style="margin-left: 15px"><a href="../algorithms/value_optimization/dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
||||
<span style="margin-left: 15px"><a href="../design/features/" style="color: #fcfcfc">Next »</a></span>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
<script>var base_url = '..';</script>
|
||||
<script src="../js/theme.js"></script>
|
||||
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
||||
<script src="../search/require.js"></script>
|
||||
<script src="../search/search.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||