1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00
Files
coach/docs/components/additional_parameters.html
Gal Leibovich 7eb884c5b2 TD3 (#338)
2019-06-16 11:11:21 +03:00

381 lines
23 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Additional Parameters &mdash; Reinforcement Learning Coach 0.12.0 documentation</title>
<script type="text/javascript" src="../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script type="text/javascript" src="../_static/jquery.js"></script>
<script type="text/javascript" src="../_static/underscore.js"></script>
<script type="text/javascript" src="../_static/doctools.js"></script>
<script type="text/javascript" src="../_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../_static/js/theme.js"></script>
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="prev" title="Spaces" href="spaces.html" />
<link href="../_static/css/custom.css" rel="stylesheet" type="text/css">
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="spaces.html">Spaces</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Additional Parameters</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#visualizationparameters">VisualizationParameters</a></li>
<li class="toctree-l2"><a class="reference internal" href="#presetvalidationparameters">PresetValidationParameters</a></li>
<li class="toctree-l2"><a class="reference internal" href="#taskparameters">TaskParameters</a></li>
<li class="toctree-l2"><a class="reference internal" href="#distributedtaskparameters">DistributedTaskParameters</a></li>
</ul>
</li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html">Docs</a> &raquo;</li>
<li>Additional Parameters</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/components/additional_parameters.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="additional-parameters">
<h1>Additional Parameters<a class="headerlink" href="#additional-parameters" title="Permalink to this headline"></a></h1>
<div class="section" id="visualizationparameters">
<h2>VisualizationParameters<a class="headerlink" href="#visualizationparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.VisualizationParameters">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">VisualizationParameters</code><span class="sig-paren">(</span><em class="sig-param">print_networks_summary=False</em>, <em class="sig-param">dump_csv=True</em>, <em class="sig-param">dump_signals_to_csv_every_x_episodes=5</em>, <em class="sig-param">dump_gifs=False</em>, <em class="sig-param">dump_mp4=False</em>, <em class="sig-param">video_dump_methods=None</em>, <em class="sig-param">dump_in_episode_signals=False</em>, <em class="sig-param">dump_parameters_documentation=True</em>, <em class="sig-param">render=False</em>, <em class="sig-param">native_rendering=False</em>, <em class="sig-param">max_fps_for_human_control=10</em>, <em class="sig-param">tensorboard=False</em>, <em class="sig-param">add_rendered_image_to_env_response=False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#VisualizationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.VisualizationParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>print_networks_summary</strong> If set to True, a summary of all the networks structure will be printed at the beginning of the experiment</p></li>
<li><p><strong>dump_csv</strong> If set to True, the logger will dump logs to a csv file once in every dump_signals_to_csv_every_x_episodes
episodes. The logs can be later used to visualize the training process using Coach Dashboard.</p></li>
<li><p><strong>dump_signals_to_csv_every_x_episodes</strong> Defines the number of episodes between writing new data to the csv log files. Lower values can affect
performance, as writing to disk may take time, and it is done synchronously.</p></li>
<li><p><strong>dump_gifs</strong> If set to True, GIF videos of the environment will be stored into the experiment directory according to
the filters defined in video_dump_methods.</p></li>
<li><p><strong>dump_mp4</strong> If set to True, MP4 videos of the environment will be stored into the experiment directory according to
the filters defined in video_dump_methods.</p></li>
<li><p><strong>dump_in_episode_signals</strong> If set to True, csv files will be dumped for each episode for inspecting different metrics within the
episode. This means that for each step in each episode, different metrics such as the reward, the
future return, etc. will be saved. Setting this to True may affect performance severely, and therefore
this should be used only for debugging purposes.</p></li>
<li><p><strong>dump_parameters_documentation</strong> If set to True, a json file containing all the agent parameters will be saved in the experiment directory.
This may be very useful for inspecting the values defined for each parameters and making sure that all
the parameters are defined as expected.</p></li>
<li><p><strong>render</strong> If set to True, the environment render function will be called for each step, rendering the image of the
environment. This may affect the performance of training, and is highly dependent on the environment.
By default, Coach uses PyGame to render the environment image instead of the environment specific rendered.
To change this, use the native_rendering flag.</p></li>
<li><p><strong>native_rendering</strong> If set to True, the environment native renderer will be used for rendering the environment image.
In some cases this can be slower than rendering using PyGame through Coach, but in other cases the
environment opens its native renderer by default, so rendering with PyGame is an unnecessary overhead.</p></li>
<li><p><strong>max_fps_for_human_control</strong> The maximum number of frames per second used while playing the environment as a human. This only has
effect while using the play flag for Coach.</p></li>
<li><p><strong>tensorboard</strong> If set to True, TensorBoard summaries will be stored in the experiment directory. This can later be
loaded in TensorBoard in order to visualize the training process.</p></li>
<li><p><strong>video_dump_methods</strong> A list of dump methods that will be used as filters for deciding when to save videos.
The filters in the list will be checked one after the other until the first dump method that returns
false for should_dump() in the environment class. This list will only be used if dump_mp4 or dump_gif are
set to True.</p></li>
<li><p><strong>add_rendered_image_to_env_response</strong> Some environments have a different observation compared to the one displayed while rendering.
For some cases it can be useful to pass the rendered image to the agent for visualization purposes.
If this flag is set to True, the rendered image will be added to the environment EnvResponse object,
which will be passed to the agent and allow using those images.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="presetvalidationparameters">
<h2>PresetValidationParameters<a class="headerlink" href="#presetvalidationparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.PresetValidationParameters">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">PresetValidationParameters</code><span class="sig-paren">(</span><em class="sig-param">test=False</em>, <em class="sig-param">min_reward_threshold=0</em>, <em class="sig-param">max_episodes_to_achieve_reward=1</em>, <em class="sig-param">num_workers=1</em>, <em class="sig-param">reward_test_level=None</em>, <em class="sig-param">test_using_a_trace_test=True</em>, <em class="sig-param">trace_test_levels=None</em>, <em class="sig-param">trace_max_env_steps=5000</em>, <em class="sig-param">read_csv_tries=200</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#PresetValidationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.PresetValidationParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>test</strong> A flag which specifies if the preset should be tested as part of the validation process.</p></li>
<li><p><strong>min_reward_threshold</strong> The minimum reward that the agent should pass after max_episodes_to_achieve_reward episodes when the
preset is run.</p></li>
<li><p><strong>max_episodes_to_achieve_reward</strong> The maximum number of episodes that the agent should train using the preset in order to achieve the
reward specified by min_reward_threshold.</p></li>
<li><p><strong>num_workers</strong> The number of workers that should be used when running this preset in the test suite for validation.</p></li>
<li><p><strong>reward_test_level</strong> The environment level or levels, given by a list of strings, that should be tested as part of the
reward tests suite.</p></li>
<li><p><strong>test_using_a_trace_test</strong> A flag that specifies if the preset should be run as part of the trace tests suite.</p></li>
<li><p><strong>trace_test_levels</strong> The environment level or levels, given by a list of strings, that should be tested as part of the
trace tests suite.</p></li>
<li><p><strong>trace_max_env_steps</strong> An integer representing the maximum number of environment steps to run when running this preset as part
of the trace tests suite.</p></li>
<li><p><strong>read_csv_tries</strong> The number of retries to attempt for reading the experiment csv file, before declaring failure.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="taskparameters">
<h2>TaskParameters<a class="headerlink" href="#taskparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.TaskParameters">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">TaskParameters</code><span class="sig-paren">(</span><em class="sig-param">framework_type: rl_coach.base_parameters.Frameworks = &lt;Frameworks.tensorflow: 'TensorFlow'&gt;</em>, <em class="sig-param">evaluate_only: int = None</em>, <em class="sig-param">use_cpu: bool = False</em>, <em class="sig-param">experiment_path='/tmp'</em>, <em class="sig-param">seed=None</em>, <em class="sig-param">checkpoint_save_secs=None</em>, <em class="sig-param">checkpoint_restore_dir=None</em>, <em class="sig-param">checkpoint_restore_path=None</em>, <em class="sig-param">checkpoint_save_dir=None</em>, <em class="sig-param">export_onnx_graph: bool = False</em>, <em class="sig-param">apply_stop_condition: bool = False</em>, <em class="sig-param">num_gpu: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#TaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.TaskParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>framework_type</strong> deep learning framework type. currently only tensorflow is supported</p></li>
<li><p><strong>evaluate_only</strong> if not None, the task will be used only for evaluating the model for the given number of steps.
A value of 0 means that task will be evaluated for an infinite number of steps.</p></li>
<li><p><strong>use_cpu</strong> use the cpu for this task</p></li>
<li><p><strong>experiment_path</strong> the path to the directory which will store all the experiment outputs</p></li>
<li><p><strong>seed</strong> a seed to use for the random numbers generator</p></li>
<li><p><strong>checkpoint_save_secs</strong> the number of seconds between each checkpoint saving</p></li>
<li><p><strong>checkpoint_restore_dir</strong> [DEPECRATED - will be removed in one of the next releases - switch to checkpoint_restore_path]
the dir to restore the checkpoints from</p></li>
<li><p><strong>checkpoint_restore_path</strong> the path to restore the checkpoints from</p></li>
<li><p><strong>checkpoint_save_dir</strong> the directory to store the checkpoints in</p></li>
<li><p><strong>export_onnx_graph</strong> If set to True, this will export an onnx graph each time a checkpoint is saved</p></li>
<li><p><strong>apply_stop_condition</strong> If set to True, this will apply the stop condition defined by reaching a target success rate</p></li>
<li><p><strong>num_gpu</strong> number of GPUs to use</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="distributedtaskparameters">
<h2>DistributedTaskParameters<a class="headerlink" href="#distributedtaskparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.DistributedTaskParameters">
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">DistributedTaskParameters</code><span class="sig-paren">(</span><em class="sig-param">framework_type: rl_coach.base_parameters.Frameworks</em>, <em class="sig-param">parameters_server_hosts: str</em>, <em class="sig-param">worker_hosts: str</em>, <em class="sig-param">job_type: str</em>, <em class="sig-param">task_index: int</em>, <em class="sig-param">evaluate_only: int = None</em>, <em class="sig-param">num_tasks: int = None</em>, <em class="sig-param">num_training_tasks: int = None</em>, <em class="sig-param">use_cpu: bool = False</em>, <em class="sig-param">experiment_path=None</em>, <em class="sig-param">dnd=None</em>, <em class="sig-param">shared_memory_scratchpad=None</em>, <em class="sig-param">seed=None</em>, <em class="sig-param">checkpoint_save_secs=None</em>, <em class="sig-param">checkpoint_restore_path=None</em>, <em class="sig-param">checkpoint_save_dir=None</em>, <em class="sig-param">export_onnx_graph: bool = False</em>, <em class="sig-param">apply_stop_condition: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#DistributedTaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.DistributedTaskParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>framework_type</strong> deep learning framework type. currently only tensorflow is supported</p></li>
<li><p><strong>evaluate_only</strong> if not None, the task will be used only for evaluating the model for the given number of steps.
A value of 0 means that task will be evaluated for an infinite number of steps.</p></li>
<li><p><strong>parameters_server_hosts</strong> comma-separated list of hostname:port pairs to which the parameter servers are
assigned</p></li>
<li><p><strong>worker_hosts</strong> comma-separated list of hostname:port pairs to which the workers are assigned</p></li>
<li><p><strong>job_type</strong> the job type - either ps (short for parameters server) or worker</p></li>
<li><p><strong>task_index</strong> the index of the process</p></li>
<li><p><strong>num_tasks</strong> the number of total tasks that are running (not including the parameters server)</p></li>
<li><p><strong>num_training_tasks</strong> the number of tasks that are training (not including the parameters server)</p></li>
<li><p><strong>use_cpu</strong> use the cpu for this task</p></li>
<li><p><strong>experiment_path</strong> the path to the directory which will store all the experiment outputs</p></li>
<li><p><strong>dnd</strong> an external DND to use for NEC. This is a workaround needed for a shared DND not using the scratchpad.</p></li>
<li><p><strong>seed</strong> a seed to use for the random numbers generator</p></li>
<li><p><strong>checkpoint_save_secs</strong> the number of seconds between each checkpoint saving</p></li>
<li><p><strong>checkpoint_restore_path</strong> the path to restore the checkpoints from</p></li>
<li><p><strong>checkpoint_save_dir</strong> the directory to store the checkpoints in</p></li>
<li><p><strong>export_onnx_graph</strong> If set to True, this will export an onnx graph each time a checkpoint is saved</p></li>
<li><p><strong>apply_stop_condition</strong> If set to True, this will apply the stop condition defined by reaching a target success rate</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="spaces.html" class="btn btn-neutral float-left" title="Spaces" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018-2019, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>