1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00

pre-release 0.10.0

This commit is contained in:
Gal Novik
2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions

View File

@@ -3,33 +3,29 @@
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Reinforcement Learning Coach by Intel Nervana.">
<title>Reinforcement Learning Coach Documentation</title>
<link rel="shortcut icon" href="./img/favicon.ico">
<title>Home - Reinforcement Learning Coach</title>
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="./css/theme.css" type="text/css" />
<link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
<link rel="stylesheet" href="./css/highlight.css">
<link href="./extra.css" rel="stylesheet">
<script>
// Current page data
var mkdocs_page_name = "None";
var mkdocs_page_name = "Home";
var mkdocs_page_input_path = "index.md";
var mkdocs_page_url = "/";
</script>
<script src="./js/jquery-2.1.1.min.js"></script>
<script src="./js/modernizr-2.8.3.min.js"></script>
<script type="text/javascript" src="./js/highlight.pack.js"></script>
<script src="./js/theme.js"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
<script type="text/javascript" src="./js/highlight.pack.js"></script>
</head>
@@ -40,7 +36,7 @@
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
<div class="wy-side-nav-search">
<a href="./index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
<a href="." class="icon icon-home"> Reinforcement Learning Coach</a>
<div role="search">
<form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
@@ -49,197 +45,152 @@
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<ul class="current">
<li>
<li class="toctree-l1 current">
<a class="current" href="./index.html">Home</a>
<ul>
<li class="toctree-l3"><a href="#what-is-coach">What is Coach?</a></li>
<li><a class="toctree-l4" href="#motivation">Motivation</a></li>
<li><a class="toctree-l4" href="#solution">Solution</a></li>
<li><a class="toctree-l4" href="#design">Design</a></li>
</ul>
</li>
<li>
<li>
<li class="toctree-l1 ">
<a class="" href="design/index.html">Design</a>
</li>
<li>
<li>
<li class="toctree-l1 ">
<a class="" href="usage/index.html">Usage</a>
</li>
<li>
<li>
<li class="toctree-l1 current">
<a class="current" href=".">Home</a>
<ul class="subnav">
<li><span>Algorithms</span></li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/dqn/index.html">DQN</a>
<li class="toctree-l2"><a href="#what-is-coach">What is Coach?</a></li>
<ul>
</li>
<li><a class="toctree-l3" href="#motivation">Motivation</a></li>
<li><a class="toctree-l3" href="#solution">Solution</a></li>
<li><a class="toctree-l3" href="#design">Design</a></li>
</ul>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/other/dfp/index.html">Direct Future Prediction</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/imitation/bc/index.html">Behavioral Cloning</a>
</li>
</ul>
<li>
</li>
<li>
<li class="toctree-l1 ">
<a class="" href="dashboard/index.html">Coach Dashboard</a>
</li>
<li>
<li class="toctree-l1">
<a class="" href="usage/">Usage</a>
</li>
<li>
<li class="toctree-l1">
<span class="caption-text">Design</span>
<ul class="subnav">
<li><span>Contributing</span></li>
<li class="toctree-l1 ">
<a class="" href="contributing/add_agent/index.html">Adding a New Agent</a>
</li>
<li class="toctree-l1 ">
<a class="" href="contributing/add_env/index.html">Adding a New Environment</a>
</li>
<li class="">
<a class="" href="design/features/">Features</a>
</li>
<li class="">
<a class="" href="design/control_flow/">Control Flow</a>
</li>
<li class="">
<a class="" href="design/network/">Network</a>
</li>
<li class="">
<a class="" href="design/filters/">Filters</a>
</li>
</ul>
<li>
</li>
<li class="toctree-l1">
<span class="caption-text">Algorithms</span>
<ul class="subnav">
<li class="">
<a class="" href="algorithms/value_optimization/dqn/">DQN</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/double_dqn/">Double DQN</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/dueling_dqn/">Dueling DQN</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/categorical_dqn/">Categorical DQN</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/mmc/">Mixed Monte Carlo</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/pal/">Persistent Advantage Learning</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/nec/">Neural Episodic Control</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/bs_dqn/">Bootstrapped DQN</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/n_step/">N-Step Q Learning</a>
</li>
<li class="">
<a class="" href="algorithms/value_optimization/naf/">Normalized Advantage Functions</a>
</li>
<li class="">
<a class="" href="algorithms/policy_optimization/pg/">Policy Gradient</a>
</li>
<li class="">
<a class="" href="algorithms/policy_optimization/ac/">Actor-Critic</a>
</li>
<li class="">
<a class="" href="algorithms/policy_optimization/ddpg/">Deep Determinstic Policy Gradients</a>
</li>
<li class="">
<a class="" href="algorithms/policy_optimization/ppo/">Proximal Policy Optimization</a>
</li>
<li class="">
<a class="" href="algorithms/policy_optimization/cppo/">Clipped Proximal Policy Optimization</a>
</li>
<li class="">
<a class="" href="algorithms/other/dfp/">Direct Future Prediction</a>
</li>
<li class="">
<a class="" href="algorithms/imitation/bc/">Behavioral Cloning</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="" href="dashboard/">Coach Dashboard</a>
</li>
<li class="toctree-l1">
<span class="caption-text">Contributing</span>
<ul class="subnav">
<li class="">
<a class="" href="contributing/add_agent/">Adding a New Agent</a>
</li>
<li class="">
<a class="" href="contributing/add_env/">Adding a New Environment</a>
</li>
</ul>
</li>
</ul>
</div>
@@ -251,7 +202,7 @@
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="./index.html">Reinforcement Learning Coach Documentation</a>
<a href=".">Reinforcement Learning Coach</a>
</nav>
@@ -259,7 +210,7 @@
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="./index.html">Docs</a> &raquo;</li>
<li><a href=".">Docs</a> &raquo;</li>
@@ -281,7 +232,7 @@
With Coach, it is possible to model an agent by combining various building blocks, and training the agent on multiple environments.
The available environments allow testing the agent in different practical fields such as robotics, autonomous driving, games and more.
Coach collects statistics from the training process and supports advanced visualization techniques for debugging the agent being trained.</p>
<p>Blog post from the Intel® Nervana™ website can be found <a href="https://www.intelnervana.com/reinforcement-learning-coach-intel">here</a>. </p>
<p>Blog post from the Intel® AI website can be found <a href="https://ai.intel.com/reinforcement-learning-coach-intel/">here</a>.</p>
<p>GitHub repository is <a href="https://github.com/NervanaSystems/coach">here</a>. </p>
<h2 id="design">Design</h2>
<p><img src="img/design.png" alt="Coach Design" style="width: 800px;"/></p>
@@ -292,7 +243,7 @@ Coach collects statistics from the training process and supports advanced visual
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="design/index.html" class="btn btn-neutral float-right" title="Design"/>Next <span class="icon icon-circle-arrow-right"></span></a>
<a href="usage/" class="btn btn-neutral float-right" title="Usage">Next <span class="icon icon-circle-arrow-right"></span></a>
</div>
@@ -307,7 +258,7 @@ Coach collects statistics from the training process and supports advanced visual
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
@@ -315,20 +266,25 @@ Coach collects statistics from the training process and supports advanced visual
</div>
<div class="rst-versions" role="note" style="cursor: pointer">
<div class="rst-versions" role="note" style="cursor: pointer">
<span class="rst-current-version" data-toggle="rst-current-version">
<span style="margin-left: 15px"><a href="design/index.html" style="color: #fcfcfc">Next &raquo;</a></span>
<span style="margin-left: 15px"><a href="usage/" style="color: #fcfcfc">Next &raquo;</a></span>
</span>
</div>
<script>var base_url = '.';</script>
<script src="./js/theme.js"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
<script src="./search/require.js"></script>
<script src="./search/search.js"></script>
</body>
</html>
<!--
MkDocs version : 0.14.0
Build Date UTC : 2017-12-18 18:59:45.506407
MkDocs version : 0.17.5
Build Date UTC : 2018-08-09 12:14:19
-->