1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00
Files
coach/docs/index.html
2018-04-23 09:14:20 +03:00

335 lines
9.2 KiB
HTML

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Reinforcement Learning Coach Documentation</title>
<link rel="shortcut icon" href="./img/favicon.ico">
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="./css/theme.css" type="text/css" />
<link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
<link rel="stylesheet" href="./css/highlight.css">
<link href="./extra.css" rel="stylesheet">
<script>
// Current page data
var mkdocs_page_name = "None";
</script>
<script src="./js/jquery-2.1.1.min.js"></script>
<script src="./js/modernizr-2.8.3.min.js"></script>
<script type="text/javascript" src="./js/highlight.pack.js"></script>
<script src="./js/theme.js"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
</head>
<body class="wy-body-for-nav" role="document">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
<div class="wy-side-nav-search">
<a href="./index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
<div role="search">
<form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li>
<li class="toctree-l1 current">
<a class="current" href="./index.html">Home</a>
<ul>
<li class="toctree-l3"><a href="#what-is-coach">What is Coach?</a></li>
<li><a class="toctree-l4" href="#motivation">Motivation</a></li>
<li><a class="toctree-l4" href="#solution">Solution</a></li>
<li><a class="toctree-l4" href="#design">Design</a></li>
</ul>
</li>
<li>
<li>
<li class="toctree-l1 ">
<a class="" href="design/index.html">Design</a>
</li>
<li>
<li>
<li class="toctree-l1 ">
<a class="" href="usage/index.html">Usage</a>
</li>
<li>
<li>
<ul class="subnav">
<li><span>Algorithms</span></li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/dqn/index.html">DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/other/dfp/index.html">Direct Future Prediction</a>
</li>
<li class="toctree-l1 ">
<a class="" href="algorithms/imitation/bc/index.html">Behavioral Cloning</a>
</li>
</ul>
<li>
<li>
<li class="toctree-l1 ">
<a class="" href="dashboard/index.html">Coach Dashboard</a>
</li>
<li>
<li>
<ul class="subnav">
<li><span>Contributing</span></li>
<li class="toctree-l1 ">
<a class="" href="contributing/add_agent/index.html">Adding a New Agent</a>
</li>
<li class="toctree-l1 ">
<a class="" href="contributing/add_env/index.html">Adding a New Environment</a>
</li>
</ul>
<li>
</ul>
</div>
&nbsp;
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="./index.html">Reinforcement Learning Coach Documentation</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="./index.html">Docs</a> &raquo;</li>
<li>Home</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main">
<div class="section">
<h1 id="what-is-coach">What is Coach?</h1>
<h2 id="motivation">Motivation</h2>
<p>Train and evaluate reinforcement learning agents by harnessing the power of multi-core CPU processing to achieve state-of-the-art results. Provide a sandbox for easing the development process of new algorithms through a modular design and an elegant set of APIs. </p>
<h2 id="solution">Solution</h2>
<p>Coach is a python environment which models the interaction between an agent and an environment in a modular way.
With Coach, it is possible to model an agent by combining various building blocks, and training the agent on multiple environments.
The available environments allow testing the agent in different practical fields such as robotics, autonomous driving, games and more.
Coach collects statistics from the training process and supports advanced visualization techniques for debugging the agent being trained.</p>
<p>Blog post from the Intel® Nervana™ website can be found <a href="https://www.intelnervana.com/reinforcement-learning-coach-intel">here</a>. </p>
<p>GitHub repository is <a href="https://github.com/NervanaSystems/coach">here</a>. </p>
<h2 id="design">Design</h2>
<p><img src="img/design.png" alt="Coach Design" style="width: 800px;"/></p>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="design/index.html" class="btn btn-neutral float-right" title="Design"/>Next <span class="icon icon-circle-arrow-right"></span></a>
</div>
<hr/>
<div role="contentinfo">
<!-- Copyright etc -->
</div>
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<div class="rst-versions" role="note" style="cursor: pointer">
<span class="rst-current-version" data-toggle="rst-current-version">
<span style="margin-left: 15px"><a href="design/index.html" style="color: #fcfcfc">Next &raquo;</a></span>
</span>
</div>
</body>
</html>
<!--
MkDocs version : 0.14.0
Build Date UTC : 2017-12-18 18:59:45.506407
-->