mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
505 lines
18 KiB
HTML
505 lines
18 KiB
HTML
<!DOCTYPE html>
|
|
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
|
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
<title>Usage - Reinforcement Learning Coach Documentation</title>
|
|
|
|
|
|
<link rel="shortcut icon" href="../img/favicon.ico">
|
|
|
|
|
|
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
|
|
|
|
<link rel="stylesheet" href="../css/theme.css" type="text/css" />
|
|
<link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
|
|
<link rel="stylesheet" href="../css/highlight.css">
|
|
<link href="../extra.css" rel="stylesheet">
|
|
|
|
|
|
<script>
|
|
// Current page data
|
|
var mkdocs_page_name = "Usage";
|
|
</script>
|
|
|
|
<script src="../js/jquery-2.1.1.min.js"></script>
|
|
<script src="../js/modernizr-2.8.3.min.js"></script>
|
|
<script type="text/javascript" src="../js/highlight.pack.js"></script>
|
|
<script src="../js/theme.js"></script>
|
|
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>
|
|
|
|
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav" role="document">
|
|
|
|
<div class="wy-grid-for-nav">
|
|
|
|
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
|
|
<div class="wy-side-nav-search">
|
|
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach Documentation</a>
|
|
<div role="search">
|
|
<form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" />
|
|
</form>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
|
<ul class="current">
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../index.html">Home</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../design/index.html">Design</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 current">
|
|
<a class="current" href="./index.html">Usage</a>
|
|
|
|
<ul>
|
|
|
|
<li class="toctree-l3"><a href="#coach-usage">Coach Usage</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#training-an-agent">Training an Agent</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#evaluating-an-agent">Evaluating an Agent</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#playing-with-the-environment-as-a-human">Playing with the Environment as a Human</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#learning-through-imitation-learning">Learning Through Imitation Learning</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#visualizations">Visualizations</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#switching-between-deep-learning-frameworks">Switching between deep learning frameworks</a></li>
|
|
|
|
<li><a class="toctree-l4" href="#additional-flags">Additional Flags</a></li>
|
|
|
|
|
|
</ul>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<ul class="subnav">
|
|
<li><span>Algorithms</span></li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/dqn/index.html">DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/double_dqn/index.html">Double DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/dueling_dqn/index.html">Dueling DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/categorical_dqn/index.html">Categorical DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/mmc/index.html">Mixed Monte Carlo</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/pal/index.html">Persistent Advantage Learning</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/nec/index.html">Neural Episodic Control</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/bs_dqn/index.html">Bootstrapped DQN</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/n_step/index.html">N-Step Q Learning</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/value_optimization/naf/index.html">Normalized Advantage Functions</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/pg/index.html">Policy Gradient</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ac/index.html">Actor-Critic</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ddpg/index.html">Deep Determinstic Policy Gradients</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/ppo/index.html">Proximal Policy Optimization</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/policy_optimization/cppo/index.html">Clipped Proximal Policy Optimization</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/other/dfp/index.html">Direct Future Prediction</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../algorithms/imitation/bc/index.html">Behavioral Cloning</a>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<li>
|
|
|
|
<li>
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../dashboard/index.html">Coach Dashboard</a>
|
|
|
|
</li>
|
|
<li>
|
|
|
|
<li>
|
|
<ul class="subnav">
|
|
<li><span>Contributing</span></li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../contributing/add_agent/index.html">Adding a New Agent</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
<li class="toctree-l1 ">
|
|
<a class="" href="../contributing/add_env/index.html">Adding a New Environment</a>
|
|
|
|
</li>
|
|
|
|
|
|
</ul>
|
|
<li>
|
|
|
|
</ul>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
|
|
|
|
|
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">Reinforcement Learning Coach Documentation</a>
|
|
</nav>
|
|
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="breadcrumbs navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html">Docs</a> »</li>
|
|
|
|
|
|
|
|
<li>Usage</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main">
|
|
<div class="section">
|
|
|
|
<h1 id="coach-usage">Coach Usage</h1>
|
|
<h2 id="training-an-agent">Training an Agent</h2>
|
|
<h3 id="single-threaded-algorithms">Single-threaded Algorithms</h3>
|
|
<p>This is the most common case. Just choose a preset using the <code>-p</code> flag and press enter.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p CartPole_DQN</code></p>
|
|
<h3 id="multi-threaded-algorithms">Multi-threaded Algorithms</h3>
|
|
<p>Multi-threaded algorithms are very common this days.
|
|
They typically achieve the best results, and scale gracefully with the number of threads.
|
|
In Coach, running such algorithms is done by selecting a suitable preset, and choosing the number of threads to run using the <code>-n</code> flag.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p CartPole_A3C -n 8</code></p>
|
|
<h2 id="evaluating-an-agent">Evaluating an Agent</h2>
|
|
<p>There are several options for evaluating an agent during the training:</p>
|
|
<ul>
|
|
<li>
|
|
<p>For multi-threaded runs, an evaluation agent will constantly run in the background and evaluate the model during the training.</p>
|
|
</li>
|
|
<li>
|
|
<p>For single-threaded runs, it is possible to define an evaluation period through the preset. This will run several episodes of evaluation once in a while.</p>
|
|
</li>
|
|
</ul>
|
|
<p>Additionally, it is possible to save checkpoints of the agents networks and then run only in evaluation mode.
|
|
Saving checkpoints can be done by specifying the number of seconds between storing checkpoints using the <code>-s</code> flag.
|
|
The checkpoints will be saved into the experiment directory.
|
|
Loading a model for evaluation can be done by specifying the <code>-crd</code> flag with the experiment directory, and the <code>--evaluate</code> flag to disable training.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p CartPole_DQN -s 60</code>
|
|
<code>python coach.py -p CartPole_DQN --evaluate -crd CHECKPOINT_RESTORE_DIR</code></p>
|
|
<h2 id="playing-with-the-environment-as-a-human">Playing with the Environment as a Human</h2>
|
|
<p>Interacting with the environment as a human can be useful for understanding its difficulties and for collecting data for imitation learning.
|
|
In Coach, this can be easily done by selecting a preset that defines the environment to use, and specifying the <code>--play</code> flag.
|
|
When the environment is loaded, the available keyboard buttons will be printed to the screen.
|
|
Pressing the escape key when finished will end the simulation and store the replay buffer in the experiment dir.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p Breakout_DQN --play</code></p>
|
|
<h2 id="learning-through-imitation-learning">Learning Through Imitation Learning</h2>
|
|
<p>Learning through imitation of human behavior is a nice way to speedup the learning.
|
|
In Coach, this can be done in two steps -</p>
|
|
<ol>
|
|
<li>
|
|
<p>Create a dataset of demonstrations by playing with the environment as a human.
|
|
After this step, a pickle of the replay buffer containing your game play will be stored in the experiment directory.
|
|
The path to this replay buffer will be printed to the screen.
|
|
To do so, you should select an environment type and level through the command line, and specify the <code>--play</code> flag.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -et Doom -lvl Basic --play</code></p>
|
|
</li>
|
|
<li>
|
|
<p>Next, use an imitation learning preset and set the replay buffer path accordingly.
|
|
The path can be set either from the command line or from the preset itself.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p Doom_Basic_BC -cp='agent.load_memory_from_file_path=\"<experiment dir>/replay_buffer.p\"'</code></p>
|
|
</li>
|
|
</ol>
|
|
<h2 id="visualizations">Visualizations</h2>
|
|
<h3 id="rendering-the-environment">Rendering the Environment</h3>
|
|
<p>Rendering the environment can be done by using the <code>-r</code> flag.
|
|
When working with multi-threaded algorithms, the rendered image will be representing the game play of the evaluation worker.
|
|
When working with single-threaded algorithms, the rendered image will be representing the single worker which can be either training or evaluating.
|
|
Keep in mind that rendering the environment in single-threaded algorithms may slow the training to some extent.
|
|
When playing with the environment using the <code>--play</code> flag, the environment will be rendered automatically without the need for specifying the <code>-r</code> flag.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p Breakout_DQN -r</code></p>
|
|
<h3 id="dumping-gifs">Dumping GIFs</h3>
|
|
<p>Coach allows storing GIFs of the agent game play.
|
|
To dump GIF files, use the <code>-dg</code> flag.
|
|
The files are dumped after every evaluation episode, and are saved into the experiment directory, under a gifs sub-directory.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p Breakout_A3C -n 4 -dg</code></p>
|
|
<h2 id="switching-between-deep-learning-frameworks">Switching between deep learning frameworks</h2>
|
|
<p>Coach uses TensorFlow as its main backend framework, but it also supports neon for some of the algorithms.
|
|
By default, TensorFlow will be used. It is possible to switch to neon using the <code>-f</code> flag.</p>
|
|
<p><em>Example:</em></p>
|
|
<p><code>python coach.py -p Doom_Basic_DQN -f neon</code></p>
|
|
<h2 id="additional-flags">Additional Flags</h2>
|
|
<p>There are several convenient flags which are important to know about.
|
|
Here we will list most of the flags, but these can be updated from time to time.
|
|
The most up to date description can be found by using the <code>-h</code> flag.</p>
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>Flag</th>
|
|
<th>Type</th>
|
|
<th>Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td><code>-p PRESET</code>, <code>`--preset PRESET</code></td>
|
|
<td>string</td>
|
|
<td>Name of a preset to run (as configured in presets.py)</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-l</code>, <code>--list</code></td>
|
|
<td>flag</td>
|
|
<td>List all available presets</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-e EXPERIMENT_NAME</code>, <code>--experiment_name EXPERIMENT_NAME</code></td>
|
|
<td>string</td>
|
|
<td>Experiment name to be used to store the results.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-r</code>, <code>--render</code></td>
|
|
<td>flag</td>
|
|
<td>Render environment</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-f FRAMEWORK</code>, <code>--framework FRAMEWORK</code></td>
|
|
<td>string</td>
|
|
<td>Neural network framework. Available values: tensorflow, neon</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-n NUM_WORKERS</code>, <code>--num_workers NUM_WORKERS</code></td>
|
|
<td>int</td>
|
|
<td>Number of workers for multi-process based agents, e.g. A3C</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>--play</code></td>
|
|
<td>flag</td>
|
|
<td>Play as a human by controlling the game with the keyboard. This option will save a replay buffer with the game play.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>--evaluate</code></td>
|
|
<td>flag</td>
|
|
<td>Run evaluation only. This is a convenient way to disable training in order to evaluate an existing checkpoint.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-v</code>, <code>--verbose</code></td>
|
|
<td>flag</td>
|
|
<td>Don't suppress TensorFlow debug prints.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-s SAVE_MODEL_SEC</code>, <code>--save_model_sec SAVE_MODEL_SEC</code></td>
|
|
<td>int</td>
|
|
<td>Time in seconds between saving checkpoints of the model.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-crd CHECKPOINT_RESTORE_DIR</code>, <code>--checkpoint_restore_dir CHECKPOINT_RESTORE_DIR</code></td>
|
|
<td>string</td>
|
|
<td>Path to a folder containing a checkpoint to restore the model from.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-dg</code>, <code>--dump_gifs</code></td>
|
|
<td>flag</td>
|
|
<td>Enable the gif saving functionality.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-at AGENT_TYPE</code>, <code>--agent_type AGENT_TYPE</code></td>
|
|
<td>string</td>
|
|
<td>Choose an agent type class to override on top of the selected preset. If no preset is defined, a preset can be set from the command-line by combining settings which are set by using <code>--agent_type</code>, <code>--experiment_type</code>, <code>--environemnt_type</code></td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-et ENVIRONMENT_TYPE</code>, <code>--environment_type ENVIRONMENT_TYPE</code></td>
|
|
<td>string</td>
|
|
<td>Choose an environment type class to override on top of the selected preset. If no preset is defined, a preset can be set from the command-line by combining settings which are set by using <code>--agent_type</code>, <code>--experiment_type</code>, <code>--environemnt_type</code></td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-ept EXPLORATION_POLICY_TYPE</code>, <code>--exploration_policy_type EXPLORATION_POLICY_TYPE</code></td>
|
|
<td>string</td>
|
|
<td>Choose an exploration policy type class to override on top of the selected preset.If no preset is defined, a preset can be set from the command-line by combining settings which are set by using <code>--agent_type</code>, <code>--experiment_type</code>, <code>--environemnt_type</code></td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-lvl LEVEL</code>, <code>--level LEVEL</code></td>
|
|
<td>string</td>
|
|
<td>Choose the level that will be played in the environment that was selected. This value will override the level parameter in the environment class.</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>-cp CUSTOM_PARAMETER</code>, <code>--custom_parameter CUSTOM_PARAMETER</code></td>
|
|
<td>string</td>
|
|
<td>Semicolon separated parameters used to override specific parameters on top of the selected preset (or on top of the command-line assembled one). Whenever a parameter value is a string, it should be inputted as <code>'\"string\"'</code>. For ex.: <code>"visualization.render=False;</code> <code>num_training_iterations=500;</code> <code>optimizer='rmsprop'"</code></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
</div>
|
|
</div>
|
|
<footer>
|
|
|
|
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
|
|
|
<a href="../algorithms/value_optimization/dqn/index.html" class="btn btn-neutral float-right" title="DQN"/>Next <span class="icon icon-circle-arrow-right"></span></a>
|
|
|
|
|
|
<a href="../design/index.html" class="btn btn-neutral" title="Design"><span class="icon icon-circle-arrow-left"></span> Previous</a>
|
|
|
|
</div>
|
|
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<!-- Copyright etc -->
|
|
|
|
</div>
|
|
|
|
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
</footer>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
</section>
|
|
|
|
</div>
|
|
|
|
<div class="rst-versions" role="note" style="cursor: pointer">
|
|
<span class="rst-current-version" data-toggle="rst-current-version">
|
|
|
|
|
|
<span><a href="../design/index.html" style="color: #fcfcfc;">« Previous</a></span>
|
|
|
|
|
|
<span style="margin-left: 15px"><a href="../algorithms/value_optimization/dqn/index.html" style="color: #fcfcfc">Next »</a></span>
|
|
|
|
</span>
|
|
</div>
|
|
|
|
</body>
|
|
</html>
|