mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
394 lines
12 KiB
HTML
394 lines
12 KiB
HTML
<!DOCTYPE html>
|
|
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
|
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
<title>Coach Dashboard - Reinforcement Learning Coach Documentation</title>
|
|
|
|
|
|
<link rel="shortcut icon" href="../img/favicon.ico">
|
|
|
|
|
|
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
|
|
|
<link rel="stylesheet" href="../css/theme.css" type="text/css" />
|
|
<link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
|
|
<link rel="stylesheet" href="../css/highlight.css">
|
|
<link href="../extra.css" rel="stylesheet">
|
|
|
|
|
|
<script>
|
|
// Current page data
|
|
var mkdocs_page_name = "Coach Dashboard";
|
|
</script>
|
|
|
|
<script src="../js/jquery-2.1.1.min.js"></script>
|
|
<script src="../js/modernizr-2.8.3.min.js"></script>
|
|
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
|
<script src="../js/theme.js"></script>
|
|
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
|
|
|
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav" role="document">
|
|
|
|
<div class="wy-grid-for-nav">
|
|
|
|
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
|
<div class="wy-side-nav-search">
|
|
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
|
<div role="search">
|
|
<form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" />
|
|
</form>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
|
<ul class="current">
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../index.html">Home</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../design/index.html">Design</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../usage/index.html">Usage</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<ul class="subnav">
|
|
<li><span>Algorithms</span></li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/dqn/index.html">DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 current">
|
|
<a class="current" href="./index.html">Coach Dashboard</a>
|
|
|
|
<ul>
|
|
|
|
<li class="toctree-l3"><a href="#visualizing-signals">Visualizing Signals</a></li>
|
|
|
|
|
|
<li class="toctree-l3"><a href="#tracking-statistics">Tracking Statistics</a></li>
|
|
|
|
|
|
<li class="toctree-l3"><a href="#comparing-runs">Comparing Runs</a></li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<ul class="subnav">
|
|
<li><span>Contributing</span></li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../contributing/add_agent/index.html">Adding a New Agent</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../contributing/add_env/index.html">Adding a New Environment</a>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<li>
|
|
|
|
</ul>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
|
|
|
|
|
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">Reinforcement Learning Coach Documentation</a>
|
|
</nav>
|
|
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="breadcrumbs navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html">Docs</a> »</li>
|
|
|
|
|
|
|
|
<li>Coach Dashboard</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main">
|
|
<div class="section">
|
|
|
|
<p>Reinforcement learning algorithms are neat. That is - when they work. But when they don't, RL algorithms are often quite tricky to debug. </p>
|
|
<p>Finding the root cause for why things break in RL is rather difficult. Moreover, different RL algorithms shine in some aspects, but then lack on other. Comparing the algorithms faithfully is also a hard task, which requires the right tools.</p>
|
|
<p>Coach Dashboard is a visualization tool which simplifies the analysis of the training process. Each run of Coach extracts a lot of information from within the algorithm and stores it in the experiment directory. This information is very valuable for debugging, analyzing and comparing different algorithms. But without a good visualization tool, this information can not be utilized. This is where Coach Dashboard takes place.</p>
|
|
<h3 id="visualizing-signals">Visualizing Signals</h3>
|
|
<p>Coach Dashboard exposes a convenient user interface for visualizing the training signals. The signals are dynamically updated - during the agent training. Additionaly, it allows selecting a subset of the available signals, and then overlaying them on top of each other. </p>
|
|
<p style="text-align: center;">
|
|
|
|
<img src="../img/updating_dynamically.gif" alt="Updating Dynamically" style="width: 800px;"/>
|
|
|
|
</p>
|
|
|
|
<ul>
|
|
<li>Holding the CTRL key, while selecting signals, will allow visualizing more than one signal. </li>
|
|
<li>Signals can be visualized, using either of the Y-axes, in order to visualize signals with different scales. To move a signal to the second Y-axis, select it and press the 'Toggle Second Axis' button.</li>
|
|
</ul>
|
|
<h3 id="tracking-statistics">Tracking Statistics</h3>
|
|
<p>When running parallel algorithms, such as A3C, it often helps visualizing the learning of all the workers, at the same time. Coach Dashboard allows viewing multiple signals (and even smooth them out, if required) from multiple workers. In addition, it supports viewing the mean and standard deviation of the same signal, across different workers, using Bollinger bands. </p>
|
|
<p style="text-align: center;">
|
|
<table style="box-shadow: none;">
|
|
<tr>
|
|
<td style="width: 450px; text-align: center;">
|
|
<img src="../img/bollinger_bands.png" alt="Bollinger Bands" style="width: 400px;"/>
|
|
<b>Displaying Bollinger Bands</b>
|
|
</td>
|
|
<td style="width: 450px; text-align: center;">
|
|
<img src="../img/separate_signals.png" alt="Separate Signals" style="width: 400px;"/>
|
|
<b>Displaying All The Workers</b>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
|
|
|
|
|
|
|
|
|
|
</p>
|
|
|
|
<h3 id="comparing-runs">Comparing Runs</h3>
|
|
<p>Reinforcement learning algorithms are notoriously known as unstable, and suffer from high run-to-run variance. This makes benchmarking and comparing different algorithms even harder. To ease this process, it is common to execute several runs of the same algorithm and average over them. This is easy to do with Coach Dashboard, by centralizing all the experiment directories in a single directory, and then loading them as a single group. Loading several groups of different algorithms then allows comparing the averaged signals, such as the total episode reward. </p>
|
|
<p>In RL, there are several interesting performance metrics to consider, and this is easy to do by controlling the X-axis units in Coach Dashboard. It is possible to switch between several options such as the total number of steps or the total training time.</p>
|
|
<p style="text-align: center;">
|
|
|
|
|
|
|
|
|
|
|
|
<table style="box-shadow: none;">
|
|
<tr>
|
|
<td style="width: 450px; text-align: center;">
|
|
|
|
<img src="../img/compare_by_time.png" alt="Comparing By Time" style="width: 400px;"/>
|
|
|
|
|
|
<b>Comparing Several Algorithms According to the Time Passed</b>
|
|
|
|
|
|
</td>
|
|
<td style="width: 450px; text-align: center;">
|
|
|
|
<img src="../img/compare_by_num_episodes.png" alt="Comparing By Number of Episodes" style="width: 400px;"/>
|
|
|
|
|
|
<b>Comparing Several Algorithms According to the Number of Episodes Played</b>
|
|
|
|
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
|
|
|
|
|
|
</p>
|
|
|
|
</div>
|
|
</div>
|
|
<footer>
|
|
|
|
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
|
|
|
<a href="../contributing/add_agent/index.html" class="btn btn-neutral float-right" title="Adding a New Agent"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
|
|
|
|
|
<a href="../algorithms/imitation/bc/index.html" class="btn btn-neutral" title="Behavioral Cloning"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
|
|
|
</div>
|
|
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<!-- Copyright etc -->
|
|
|
|
</div>
|
|
|
|
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
</footer>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
</section>
|
|
|
|
</div>
|
|
|
|
<div class="rst-versions" role="note" style="cursor: pointer">
|
|
<span class="rst-current-version" data-toggle="rst-current-version">
|
|
|
|
|
|
<span><a href="../algorithms/imitation/bc/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
|
|
|
|
|
<span style="margin-left: 15px"><a href="../contributing/add_agent/index.html" style="color: #fcfcfc">Next »</a></span>
|
|
|
|
</span>
|
|
</div>
|
|
|
|
</body>
|
|
</html>
|