mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
TD3 (#338)
This commit is contained in:
@@ -195,7 +195,7 @@
|
||||
<h2>VisualizationParameters<a class="headerlink" href="#visualizationparameters" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.VisualizationParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">VisualizationParameters</code><span class="sig-paren">(</span><em>print_networks_summary=False</em>, <em>dump_csv=True</em>, <em>dump_signals_to_csv_every_x_episodes=5</em>, <em>dump_gifs=False</em>, <em>dump_mp4=False</em>, <em>video_dump_methods=None</em>, <em>dump_in_episode_signals=False</em>, <em>dump_parameters_documentation=True</em>, <em>render=False</em>, <em>native_rendering=False</em>, <em>max_fps_for_human_control=10</em>, <em>tensorboard=False</em>, <em>add_rendered_image_to_env_response=False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#VisualizationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.VisualizationParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">VisualizationParameters</code><span class="sig-paren">(</span><em class="sig-param">print_networks_summary=False</em>, <em class="sig-param">dump_csv=True</em>, <em class="sig-param">dump_signals_to_csv_every_x_episodes=5</em>, <em class="sig-param">dump_gifs=False</em>, <em class="sig-param">dump_mp4=False</em>, <em class="sig-param">video_dump_methods=None</em>, <em class="sig-param">dump_in_episode_signals=False</em>, <em class="sig-param">dump_parameters_documentation=True</em>, <em class="sig-param">render=False</em>, <em class="sig-param">native_rendering=False</em>, <em class="sig-param">max_fps_for_human_control=10</em>, <em class="sig-param">tensorboard=False</em>, <em class="sig-param">add_rendered_image_to_env_response=False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#VisualizationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.VisualizationParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -244,7 +244,7 @@ which will be passed to the agent and allow using those images.</p></li>
|
||||
<h2>PresetValidationParameters<a class="headerlink" href="#presetvalidationparameters" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.PresetValidationParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">PresetValidationParameters</code><span class="sig-paren">(</span><em>test=False</em>, <em>min_reward_threshold=0</em>, <em>max_episodes_to_achieve_reward=1</em>, <em>num_workers=1</em>, <em>reward_test_level=None</em>, <em>test_using_a_trace_test=True</em>, <em>trace_test_levels=None</em>, <em>trace_max_env_steps=5000</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#PresetValidationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.PresetValidationParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">PresetValidationParameters</code><span class="sig-paren">(</span><em class="sig-param">test=False</em>, <em class="sig-param">min_reward_threshold=0</em>, <em class="sig-param">max_episodes_to_achieve_reward=1</em>, <em class="sig-param">num_workers=1</em>, <em class="sig-param">reward_test_level=None</em>, <em class="sig-param">test_using_a_trace_test=True</em>, <em class="sig-param">trace_test_levels=None</em>, <em class="sig-param">trace_max_env_steps=5000</em>, <em class="sig-param">read_csv_tries=200</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#PresetValidationParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.PresetValidationParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -261,6 +261,7 @@ reward tests suite.</p></li>
|
||||
trace tests suite.</p></li>
|
||||
<li><p><strong>trace_max_env_steps</strong> – An integer representing the maximum number of environment steps to run when running this preset as part
|
||||
of the trace tests suite.</p></li>
|
||||
<li><p><strong>read_csv_tries</strong> – The number of retries to attempt for reading the experiment csv file, before declaring failure.</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
@@ -271,7 +272,7 @@ of the trace tests suite.</p></li>
|
||||
<h2>TaskParameters<a class="headerlink" href="#taskparameters" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.TaskParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">TaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks = <Frameworks.tensorflow: 'TensorFlow'></em>, <em>evaluate_only: int = None</em>, <em>use_cpu: bool = False</em>, <em>experiment_path='/tmp'</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_dir=None</em>, <em>checkpoint_restore_path=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em>, <em>apply_stop_condition: bool = False</em>, <em>num_gpu: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#TaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.TaskParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">TaskParameters</code><span class="sig-paren">(</span><em class="sig-param">framework_type: rl_coach.base_parameters.Frameworks = <Frameworks.tensorflow: 'TensorFlow'></em>, <em class="sig-param">evaluate_only: int = None</em>, <em class="sig-param">use_cpu: bool = False</em>, <em class="sig-param">experiment_path='/tmp'</em>, <em class="sig-param">seed=None</em>, <em class="sig-param">checkpoint_save_secs=None</em>, <em class="sig-param">checkpoint_restore_dir=None</em>, <em class="sig-param">checkpoint_restore_path=None</em>, <em class="sig-param">checkpoint_save_dir=None</em>, <em class="sig-param">export_onnx_graph: bool = False</em>, <em class="sig-param">apply_stop_condition: bool = False</em>, <em class="sig-param">num_gpu: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#TaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.TaskParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -299,7 +300,7 @@ the dir to restore the checkpoints from</p></li>
|
||||
<h2>DistributedTaskParameters<a class="headerlink" href="#distributedtaskparameters" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.DistributedTaskParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">DistributedTaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks</em>, <em>parameters_server_hosts: str</em>, <em>worker_hosts: str</em>, <em>job_type: str</em>, <em>task_index: int</em>, <em>evaluate_only: int = None</em>, <em>num_tasks: int = None</em>, <em>num_training_tasks: int = None</em>, <em>use_cpu: bool = False</em>, <em>experiment_path=None</em>, <em>dnd=None</em>, <em>shared_memory_scratchpad=None</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_path=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em>, <em>apply_stop_condition: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#DistributedTaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.DistributedTaskParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">DistributedTaskParameters</code><span class="sig-paren">(</span><em class="sig-param">framework_type: rl_coach.base_parameters.Frameworks</em>, <em class="sig-param">parameters_server_hosts: str</em>, <em class="sig-param">worker_hosts: str</em>, <em class="sig-param">job_type: str</em>, <em class="sig-param">task_index: int</em>, <em class="sig-param">evaluate_only: int = None</em>, <em class="sig-param">num_tasks: int = None</em>, <em class="sig-param">num_training_tasks: int = None</em>, <em class="sig-param">use_cpu: bool = False</em>, <em class="sig-param">experiment_path=None</em>, <em class="sig-param">dnd=None</em>, <em class="sig-param">shared_memory_scratchpad=None</em>, <em class="sig-param">seed=None</em>, <em class="sig-param">checkpoint_save_secs=None</em>, <em class="sig-param">checkpoint_restore_path=None</em>, <em class="sig-param">checkpoint_save_dir=None</em>, <em class="sig-param">export_onnx_graph: bool = False</em>, <em class="sig-param">apply_stop_condition: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#DistributedTaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.DistributedTaskParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -124,6 +124,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -239,7 +240,7 @@ the expert for each state.</p>
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.bc_agent.BCAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.bc_agent.</code><code class="descname">BCAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/bc_agent.html#BCAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.bc_agent.BCAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.bc_agent.</code><code class="sig-name descname">BCAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/bc_agent.html#BCAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.bc_agent.BCAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -124,6 +124,7 @@
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -245,7 +246,7 @@ so that the loss for the other heads will be zeroed out.</p></li>
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.cil_agent.CILAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.cil_agent.</code><code class="descname">CILAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/cil_agent.html#CILAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.cil_agent.CILAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.cil_agent.</code><code class="sig-name descname">CILAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/cil_agent.html#CILAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.cil_agent.CILAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><p><strong>state_key_with_the_class_index</strong> – (str)
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -225,6 +226,7 @@ A detailed description of those algorithms can be found by navigating to each of
|
||||
<li class="toctree-l1"><a class="reference internal" href="imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -243,7 +245,7 @@ A detailed description of those algorithms can be found by navigating to each of
|
||||
</div>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.AgentParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">AgentParameters</code><span class="sig-paren">(</span><em>algorithm: rl_coach.base_parameters.AlgorithmParameters, exploration: ExplorationParameters, memory: MemoryParameters, networks: Dict[str, rl_coach.base_parameters.NetworkParameters], visualization: rl_coach.base_parameters.VisualizationParameters = <rl_coach.base_parameters.VisualizationParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/base_parameters.html#AgentParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.AgentParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">AgentParameters</code><span class="sig-paren">(</span><em class="sig-param">algorithm: rl_coach.base_parameters.AlgorithmParameters, exploration: ExplorationParameters, memory: MemoryParameters, networks: Dict[str, rl_coach.base_parameters.NetworkParameters], visualization: rl_coach.base_parameters.VisualizationParameters = <rl_coach.base_parameters.VisualizationParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/base_parameters.html#AgentParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.AgentParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -270,7 +272,7 @@ used for visualization purposes, such as printing to the screen, rendering, and
|
||||
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.agent.Agent">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.agent.</code><code class="descname">Agent</code><span class="sig-paren">(</span><em>agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em>parent: Union[LevelManager</em>, <em>CompositeAgent] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.agent.</code><code class="sig-name descname">Agent</code><span class="sig-paren">(</span><em class="sig-param">agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em class="sig-param">parent: Union[LevelManager</em>, <em class="sig-param">CompositeAgent] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><p><strong>agent_parameters</strong> – A AgentParameters class instance with all the agent parameters</p>
|
||||
@@ -278,7 +280,7 @@ used for visualization purposes, such as printing to the screen, rendering, and
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.act">
|
||||
<code class="descname">act</code><span class="sig-paren">(</span><em>action: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray</em>, <em>List] = None</em><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.act"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.act" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">act</code><span class="sig-paren">(</span><em class="sig-param">action: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray</em>, <em class="sig-param">List] = None</em><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.act"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.act" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given the agents current knowledge, decide on the next action to apply to the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -292,7 +294,7 @@ used for visualization purposes, such as printing to the screen, rendering, and
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.call_memory">
|
||||
<code class="descname">call_memory</code><span class="sig-paren">(</span><em>func</em>, <em>args=()</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.call_memory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.call_memory" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">call_memory</code><span class="sig-paren">(</span><em class="sig-param">func</em>, <em class="sig-param">args=()</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.call_memory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.call_memory" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This function is a wrapper to allow having the same calls for shared or unshared memories.
|
||||
It should be used instead of calling the memory directly in order to allow different algorithms to work
|
||||
both with a shared and a local memory.</p>
|
||||
@@ -311,7 +313,7 @@ both with a shared and a local memory.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.choose_action">
|
||||
<code class="descname">choose_action</code><span class="sig-paren">(</span><em>curr_state</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.choose_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.choose_action" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">choose_action</code><span class="sig-paren">(</span><em class="sig-param">curr_state</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.choose_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.choose_action" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>choose an action to act with in the current episode being played. Different behavior might be exhibited when
|
||||
training or testing.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -326,7 +328,7 @@ training or testing.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.collect_savers">
|
||||
<code class="descname">collect_savers</code><span class="sig-paren">(</span><em>parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">collect_savers</code><span class="sig-paren">(</span><em class="sig-param">parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Collect all of agent’s network savers
|
||||
:param parent_path_suffix: path suffix of the parent of the agent
|
||||
(could be name of level manager or composite agent)
|
||||
@@ -335,7 +337,7 @@ training or testing.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.create_networks">
|
||||
<code class="descname">create_networks</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Dict[str, rl_coach.architectures.network_wrapper.NetworkWrapper]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.create_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.create_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">create_networks</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Dict[str, rl_coach.architectures.network_wrapper.NetworkWrapper]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.create_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.create_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Create all the networks of the agent.
|
||||
The network creation will be done after setting the environment parameters for the agent, since they are needed
|
||||
for creating the network.</p>
|
||||
@@ -346,9 +348,16 @@ for creating the network.</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.freeze_memory">
|
||||
<code class="sig-name descname">freeze_memory</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.freeze_memory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.freeze_memory" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Shuffle episodes in the memory and freeze it to make sure that no extra data is being pushed anymore.
|
||||
:return: None</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.get_predictions">
|
||||
<code class="descname">get_predictions</code><span class="sig-paren">(</span><em>states: List[Dict[str, numpy.ndarray]], prediction_type: rl_coach.core_types.PredictionType</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.get_predictions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.get_predictions" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_predictions</code><span class="sig-paren">(</span><em class="sig-param">states: List[Dict[str, numpy.ndarray]], prediction_type: rl_coach.core_types.PredictionType</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.get_predictions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.get_predictions" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get a prediction from the agent with regard to the requested prediction_type.
|
||||
If the agent cannot predict this type of prediction_type, or if there is more than possible way to do so,
|
||||
raise a ValueException.</p>
|
||||
@@ -367,7 +376,7 @@ raise a ValueException.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.get_state_embedding">
|
||||
<code class="descname">get_state_embedding</code><span class="sig-paren">(</span><em>state: dict</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.get_state_embedding"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.get_state_embedding" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_state_embedding</code><span class="sig-paren">(</span><em class="sig-param">state: dict</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.get_state_embedding"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.get_state_embedding" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a state, get the corresponding state embedding from the main network</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -381,7 +390,7 @@ raise a ValueException.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.handle_episode_ended">
|
||||
<code class="descname">handle_episode_ended</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.handle_episode_ended"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.handle_episode_ended" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">handle_episode_ended</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.handle_episode_ended"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.handle_episode_ended" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Make any changes needed when each episode is ended.
|
||||
This includes incrementing counters, updating full episode dependent values, updating logs, etc.
|
||||
This function is called right after each episode is ended.</p>
|
||||
@@ -394,7 +403,7 @@ This function is called right after each episode is ended.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.init_environment_dependent_modules">
|
||||
<code class="descname">init_environment_dependent_modules</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.init_environment_dependent_modules"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.init_environment_dependent_modules" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">init_environment_dependent_modules</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.init_environment_dependent_modules"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.init_environment_dependent_modules" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Initialize any modules that depend on knowing information about the environment such as the action space or
|
||||
the observation space</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -404,9 +413,20 @@ the observation space</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.initialize_session_dependent_components">
|
||||
<code class="sig-name descname">initialize_session_dependent_components</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.initialize_session_dependent_components"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.initialize_session_dependent_components" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Initialize components which require a session as part of their initialization.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
<dd class="field-odd"><p>None</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.learn_from_batch">
|
||||
<code class="descname">learn_from_batch</code><span class="sig-paren">(</span><em>batch</em><span class="sig-paren">)</span> → Tuple[float, List, List]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.learn_from_batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.learn_from_batch" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">learn_from_batch</code><span class="sig-paren">(</span><em class="sig-param">batch</em><span class="sig-paren">)</span> → Tuple[float, List, List]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.learn_from_batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.learn_from_batch" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a batch of transitions, calculates their target values and updates the network.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -418,9 +438,20 @@ the observation space</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.load_memory_from_file">
|
||||
<code class="sig-name descname">load_memory_from_file</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.load_memory_from_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.load_memory_from_file" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Load memory transitions from a file.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
<dd class="field-odd"><p>None</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.log_to_screen">
|
||||
<code class="descname">log_to_screen</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.log_to_screen"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.log_to_screen" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">log_to_screen</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.log_to_screen"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.log_to_screen" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Write an episode summary line to the terminal</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -431,7 +462,7 @@ the observation space</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.observe">
|
||||
<code class="descname">observe</code><span class="sig-paren">(</span><em>env_response: rl_coach.core_types.EnvResponse</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.observe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.observe" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">observe</code><span class="sig-paren">(</span><em class="sig-param">env_response: rl_coach.core_types.EnvResponse</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.observe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.observe" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a response from the environment, distill the observation from it and store it for later use.
|
||||
The response should be a dictionary containing the performed action, the new observation and measurements,
|
||||
the reward, a game over flag and any additional information necessary.</p>
|
||||
@@ -446,9 +477,9 @@ given observation</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.parent">
|
||||
<code class="descname">parent</code><a class="headerlink" href="#rl_coach.agents.agent.Agent.parent" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">parent</code><a class="headerlink" href="#rl_coach.agents.agent.Agent.parent" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the parent class of the agent</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -457,9 +488,9 @@ given observation</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.phase">
|
||||
<code class="descname">phase</code><a class="headerlink" href="#rl_coach.agents.agent.Agent.phase" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">phase</code><a class="headerlink" href="#rl_coach.agents.agent.Agent.phase" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The current running phase of the agent</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -470,7 +501,7 @@ given observation</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.post_training_commands">
|
||||
<code class="descname">post_training_commands</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.post_training_commands"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.post_training_commands" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">post_training_commands</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.post_training_commands"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.post_training_commands" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A function which allows adding any functionality that is required to run right after the training phase ends.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -481,7 +512,7 @@ given observation</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.prepare_batch_for_inference">
|
||||
<code class="descname">prepare_batch_for_inference</code><span class="sig-paren">(</span><em>states: Union[Dict[str, numpy.ndarray], List[Dict[str, numpy.ndarray]]], network_name: str</em><span class="sig-paren">)</span> → Dict[str, numpy.array]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.prepare_batch_for_inference"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.prepare_batch_for_inference" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">prepare_batch_for_inference</code><span class="sig-paren">(</span><em class="sig-param">states: Union[Dict[str, numpy.ndarray], List[Dict[str, numpy.ndarray]]], network_name: str</em><span class="sig-paren">)</span> → Dict[str, numpy.core.multiarray.array]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.prepare_batch_for_inference"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.prepare_batch_for_inference" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Convert curr_state into input tensors tensorflow is expecting. i.e. if we have several inputs states, stack all
|
||||
observations together, measurements together, etc.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -501,7 +532,7 @@ the observation relevant for the network from the states.</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.register_signal">
|
||||
<code class="descname">register_signal</code><span class="sig-paren">(</span><em>signal_name: str</em>, <em>dump_one_value_per_episode: bool = True</em>, <em>dump_one_value_per_step: bool = False</em><span class="sig-paren">)</span> → rl_coach.utils.Signal<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.register_signal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.register_signal" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">register_signal</code><span class="sig-paren">(</span><em class="sig-param">signal_name: str</em>, <em class="sig-param">dump_one_value_per_episode: bool = True</em>, <em class="sig-param">dump_one_value_per_step: bool = False</em><span class="sig-paren">)</span> → rl_coach.utils.Signal<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.register_signal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.register_signal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Register a signal such that its statistics will be dumped and be viewable through dashboard</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -519,7 +550,7 @@ the observation relevant for the network from the states.</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.reset_evaluation_state">
|
||||
<code class="descname">reset_evaluation_state</code><span class="sig-paren">(</span><em>val: rl_coach.core_types.RunPhase</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.reset_evaluation_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.reset_evaluation_state" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">reset_evaluation_state</code><span class="sig-paren">(</span><em class="sig-param">val: rl_coach.core_types.RunPhase</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.reset_evaluation_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.reset_evaluation_state" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Perform accumulators initialization when entering an evaluation phase, and signal dumping when exiting an
|
||||
evaluation phase. Entering or exiting the evaluation phase is determined according to the new phase given
|
||||
by val, and by the current phase set in self.phase.</p>
|
||||
@@ -535,7 +566,7 @@ by val, and by the current phase set in self.phase.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.reset_internal_state">
|
||||
<code class="descname">reset_internal_state</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.reset_internal_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.reset_internal_state" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">reset_internal_state</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.reset_internal_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.reset_internal_state" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Reset all the episodic parameters. This function is called right before each episode starts.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -546,7 +577,7 @@ by val, and by the current phase set in self.phase.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.restore_checkpoint">
|
||||
<code class="descname">restore_checkpoint</code><span class="sig-paren">(</span><em>checkpoint_dir: str</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.restore_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.restore_checkpoint" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">restore_checkpoint</code><span class="sig-paren">(</span><em class="sig-param">checkpoint_dir: str</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.restore_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.restore_checkpoint" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows agents to store additional information when saving checkpoints.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -560,7 +591,7 @@ by val, and by the current phase set in self.phase.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.run_off_policy_evaluation">
|
||||
<code class="descname">run_off_policy_evaluation</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="headerlink" href="#rl_coach.agents.agent.Agent.run_off_policy_evaluation" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">run_off_policy_evaluation</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="headerlink" href="#rl_coach.agents.agent.Agent.run_off_policy_evaluation" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Run off-policy evaluation estimators to evaluate the trained policy performance against a dataset.
|
||||
Should only be implemented for off-policy RL algorithms.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -572,7 +603,7 @@ Should only be implemented for off-policy RL algorithms.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference">
|
||||
<code class="descname">run_pre_network_filter_for_inference</code><span class="sig-paren">(</span><em>state: Dict[str, numpy.ndarray], update_filter_internal_state: bool = True</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.run_pre_network_filter_for_inference"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">run_pre_network_filter_for_inference</code><span class="sig-paren">(</span><em class="sig-param">state: Dict[str, numpy.ndarray], update_filter_internal_state: bool = True</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.run_pre_network_filter_for_inference"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Run filters which where defined for being applied right before using the state for inference.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -589,7 +620,7 @@ Should only be implemented for off-policy RL algorithms.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.save_checkpoint">
|
||||
<code class="descname">save_checkpoint</code><span class="sig-paren">(</span><em>checkpoint_prefix: str</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.save_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.save_checkpoint" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">save_checkpoint</code><span class="sig-paren">(</span><em class="sig-param">checkpoint_prefix: str</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.save_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.save_checkpoint" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows agents to store additional information when saving checkpoints.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -603,7 +634,7 @@ Should only be implemented for off-policy RL algorithms.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.set_environment_parameters">
|
||||
<code class="descname">set_environment_parameters</code><span class="sig-paren">(</span><em>spaces: rl_coach.spaces.SpacesDefinition</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_environment_parameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_environment_parameters" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_environment_parameters</code><span class="sig-paren">(</span><em class="sig-param">spaces: rl_coach.spaces.SpacesDefinition</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_environment_parameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_environment_parameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sets the parameters that are environment dependent. As a side effect, initializes all the components that are
|
||||
dependent on those values, by calling init_environment_dependent_modules</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -618,7 +649,7 @@ dependent on those values, by calling init_environment_dependent_modules</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.set_incoming_directive">
|
||||
<code class="descname">set_incoming_directive</code><span class="sig-paren">(</span><em>action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_incoming_directive"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_incoming_directive" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_incoming_directive</code><span class="sig-paren">(</span><em class="sig-param">action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_incoming_directive"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_incoming_directive" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows setting a directive for the agent to follow. This is useful in hierarchy structures, where the agent
|
||||
has another master agent that is controlling it. In such cases, the master agent can define the goals for the
|
||||
slave agent, define it’s observation, possible actions, etc. The directive type is defined by the agent
|
||||
@@ -635,7 +666,7 @@ in-action-space.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.set_session">
|
||||
<code class="descname">set_session</code><span class="sig-paren">(</span><em>sess</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_session"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_session" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_session</code><span class="sig-paren">(</span><em class="sig-param">sess</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.set_session"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.set_session" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Set the deep learning framework session for all the agents in the composite agent</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -646,7 +677,7 @@ in-action-space.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.setup_logger">
|
||||
<code class="descname">setup_logger</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.setup_logger"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.setup_logger" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">setup_logger</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.setup_logger"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.setup_logger" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Setup the logger for the agent</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -657,7 +688,7 @@ in-action-space.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.sync">
|
||||
<code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.sync" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.sync" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sync the global network parameters to local networks</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -668,7 +699,7 @@ in-action-space.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.train">
|
||||
<code class="descname">train</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → float<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.train"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.train" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">train</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → float<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.train"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.train" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Check if a training phase should be done as configured by num_consecutive_playing_steps.
|
||||
If it should, then do several training steps as configured by num_consecutive_training_steps.
|
||||
A single training iteration: Sample a batch, train on it and update target networks.</p>
|
||||
@@ -681,7 +712,7 @@ A single training iteration: Sample a batch, train on it and update target netwo
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.update_log">
|
||||
<code class="descname">update_log</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_log" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_log</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_log" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Updates the episodic log file with all the signal values from the most recent episode.
|
||||
Additional signals for logging can be set by the creating a new signal using self.register_signal,
|
||||
and then updating it with some internal agent values.</p>
|
||||
@@ -694,7 +725,7 @@ and then updating it with some internal agent values.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.update_step_in_episode_log">
|
||||
<code class="descname">update_step_in_episode_log</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_step_in_episode_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_step_in_episode_log" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_step_in_episode_log</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_step_in_episode_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_step_in_episode_log" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Updates the in-episode log file with all the signal values from the most recent step.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -705,7 +736,7 @@ and then updating it with some internal agent values.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.agents.agent.Agent.update_transition_before_adding_to_replay_buffer">
|
||||
<code class="descname">update_transition_before_adding_to_replay_buffer</code><span class="sig-paren">(</span><em>transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_transition_before_adding_to_replay_buffer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_transition_before_adding_to_replay_buffer" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_transition_before_adding_to_replay_buffer</code><span class="sig-paren">(</span><em class="sig-param">transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.update_transition_before_adding_to_replay_buffer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.update_transition_before_adding_to_replay_buffer" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows agents to update the transition just before adding it to the replay buffer.
|
||||
Can be useful for agents that want to tweak the reward, termination signal, etc.</p>
|
||||
<dl class="field-list simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Direct Future Prediction</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#network-structure">Network Structure</a></li>
|
||||
@@ -249,7 +250,7 @@ measurements that were seen in time-steps <span class="math notranslate nohighli
|
||||
For the actions that were not taken, the targets are the current values.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.dfp_agent.DFPAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.dfp_agent.</code><code class="descname">DFPAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dfp_agent.html#DFPAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dfp_agent.DFPAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.dfp_agent.</code><code class="sig-name descname">DFPAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dfp_agent.html#DFPAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dfp_agent.DFPAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -247,7 +248,7 @@ where <span class="math notranslate nohighlight">\(k\)</span> is <span class="ma
|
||||
<span class="math notranslate nohighlight">\(L = -\mathop{\mathbb{E}} [log (\pi) \cdot A]\)</span></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.actor_critic_agent.</code><code class="descname">ActorCriticAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/actor_critic_agent.html#ActorCriticAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.actor_critic_agent.</code><code class="sig-name descname">ActorCriticAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/actor_critic_agent.html#ActorCriticAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -279,7 +280,7 @@ The goal of the trust region update is to the difference between the updated pol
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.acer_agent.ACERAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.acer_agent.</code><code class="descname">ACERAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/acer_agent.html#ACERAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.acer_agent.ACERAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.acer_agent.</code><code class="sig-name descname">ACERAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/acer_agent.html#ACERAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.acer_agent.ACERAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -252,7 +253,7 @@ clipped surrogate loss:</p>
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.clipped_ppo_agent.ClippedPPOAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.clipped_ppo_agent.</code><code class="descname">ClippedPPOAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/clipped_ppo_agent.html#ClippedPPOAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.clipped_ppo_agent.ClippedPPOAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.clipped_ppo_agent.</code><code class="sig-name descname">ClippedPPOAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/clipped_ppo_agent.html#ClippedPPOAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.clipped_ppo_agent.ClippedPPOAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Soft Actor-Critic" href="sac.html" />
|
||||
<link rel="next" title="Twin Delayed Deep Deterministic Policy Gradient" href="td3.html" />
|
||||
<link rel="prev" title="Clipped Proximal Policy Optimization" href="cppo.html" />
|
||||
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -257,7 +258,7 @@ given <span class="math notranslate nohighlight">\(\nabla_a Q(s,a)\)</span>. Fin
|
||||
<p>After every training step, do a soft update of the critic and actor target networks’ weights from the online networks.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.ddpg_agent.</code><code class="descname">DDPGAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ddpg_agent.html#DDPGAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.ddpg_agent.</code><code class="sig-name descname">DDPGAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ddpg_agent.html#DDPGAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -297,7 +298,7 @@ values. If set to False, the terminal states reward will be taken as the target
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="sac.html" class="btn btn-neutral float-right" title="Soft Actor-Critic" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
<a href="td3.html" class="btn btn-neutral float-right" title="Twin Delayed Deep Deterministic Policy Gradient" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
|
||||
|
||||
<a href="cppo.html" class="btn btn-neutral float-left" title="Clipped Proximal Policy Optimization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -251,7 +252,7 @@ serves the same purpose - reducing the update variance. After accumulating gradi
|
||||
the gradients are then applied to the network.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.policy_gradients_agent.PolicyGradientAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.policy_gradients_agent.</code><code class="descname">PolicyGradientAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/policy_gradients_agent.html#PolicyGradientAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.policy_gradients_agent.PolicyGradientAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.policy_gradients_agent.</code><code class="sig-name descname">PolicyGradientAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/policy_gradients_agent.html#PolicyGradientAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.policy_gradients_agent.PolicyGradientAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -253,7 +254,7 @@ increase the penalty, if it went too low, reduce it. Otherwise, leave it unchang
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.ppo_agent.PPOAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.ppo_agent.</code><code class="descname">PPOAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ppo_agent.html#PPOAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ppo_agent.PPOAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.ppo_agent.</code><code class="sig-name descname">PPOAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ppo_agent.html#PPOAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ppo_agent.PPOAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Direct Future Prediction" href="../other/dfp.html" />
|
||||
<link rel="prev" title="Deep Deterministic Policy Gradient" href="ddpg.html" />
|
||||
<link rel="prev" title="Twin Delayed Deep Deterministic Policy Gradient" href="td3.html" />
|
||||
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
|
||||
|
||||
</head>
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Soft Actor-Critic</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#network-structure">Network Structure</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#algorithm-description">Algorithm Description</a><ul>
|
||||
@@ -258,7 +259,7 @@ from the current policy.</p>
|
||||
<p>After every training step, do a soft update of the V target network’s weights from the online networks.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.soft_actor_critic_agent.SoftActorCriticAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.soft_actor_critic_agent.</code><code class="descname">SoftActorCriticAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/soft_actor_critic_agent.html#SoftActorCriticAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.soft_actor_critic_agent.SoftActorCriticAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.soft_actor_critic_agent.</code><code class="sig-name descname">SoftActorCriticAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/soft_actor_critic_agent.html#SoftActorCriticAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.soft_actor_critic_agent.SoftActorCriticAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -290,7 +291,7 @@ and not sampled from the policy distribution.</p></li>
|
||||
<a href="../other/dfp.html" class="btn btn-neutral float-right" title="Direct Future Prediction" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
|
||||
|
||||
<a href="ddpg.html" class="btn btn-neutral float-left" title="Deep Deterministic Policy Gradient" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
|
||||
<a href="td3.html" class="btn btn-neutral float-left" title="Twin Delayed Deep Deterministic Policy Gradient" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
347
docs/components/agents/policy_optimization/td3.html
Normal file
347
docs/components/agents/policy_optimization/td3.html
Normal file
@@ -0,0 +1,347 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Twin Delayed Deep Deterministic Policy Gradient — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/language_data.js"></script>
|
||||
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Soft Actor-Critic" href="sac.html" />
|
||||
<link rel="prev" title="Deep Deterministic Policy Gradient" href="ddpg.html" />
|
||||
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
|
||||
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home"> Reinforcement Learning Coach
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="../../../_static/dark_logo.png" class="logo" alt="Logo"/>
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">Intro</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Design</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Contributing</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_agent.html">Adding a New Agent</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_env.html">Adding a New Environment</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Components</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Agents</a><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="ac.html">Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="acer.html">ACER</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/bc.html">Behavioral Cloning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/bs_dqn.html">Bootstrapped DQN</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/categorical_dqn.html">Categorical DQN</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Twin Delayed Deep Deterministic Policy Gradient</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#network-structure">Network Structure</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#algorithm-description">Algorithm Description</a><ul>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#choosing-an-action">Choosing an action</a></li>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#training-the-network">Training the network</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/dqn.html">Deep Q Networks</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/dueling_dqn.html">Dueling DQN</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/mmc.html">Mixed Monte Carlo</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/n_step.html">N-Step Q Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/naf.html">Normalized Advantage Functions</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/nec.html">Neural Episodic Control</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/pal.html">Persistent Advantage Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="pg.html">Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="ppo.html">Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/rainbow.html">Rainbow</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/qr_dqn.html">Quantile Regression DQN</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">Reinforcement Learning Coach</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Agents</a> »</li>
|
||||
|
||||
<li>Twin Delayed Deep Deterministic Policy Gradient</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
|
||||
<a href="../../../_sources/components/agents/policy_optimization/td3.rst.txt" rel="nofollow"> View page source</a>
|
||||
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<div class="section" id="twin-delayed-deep-deterministic-policy-gradient">
|
||||
<h1>Twin Delayed Deep Deterministic Policy Gradient<a class="headerlink" href="#twin-delayed-deep-deterministic-policy-gradient" title="Permalink to this headline">¶</a></h1>
|
||||
<p><strong>Actions space:</strong> Continuous</p>
|
||||
<p><strong>References:</strong> <a class="reference external" href="https://arxiv.org/pdf/1802.09477">Addressing Function Approximation Error in Actor-Critic Methods</a></p>
|
||||
<div class="section" id="network-structure">
|
||||
<h2>Network Structure<a class="headerlink" href="#network-structure" title="Permalink to this headline">¶</a></h2>
|
||||
<img alt="../../../_images/td3.png" class="align-center" src="../../../_images/td3.png" />
|
||||
</div>
|
||||
<div class="section" id="algorithm-description">
|
||||
<h2>Algorithm Description<a class="headerlink" href="#algorithm-description" title="Permalink to this headline">¶</a></h2>
|
||||
<div class="section" id="choosing-an-action">
|
||||
<h3>Choosing an action<a class="headerlink" href="#choosing-an-action" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Pass the current states through the actor network, and get an action mean vector <span class="math notranslate nohighlight">\(\mu\)</span>.
|
||||
While in training phase, use a continuous exploration policy, such as a small zero-meaned gaussian noise,
|
||||
to add exploration noise to the action. When testing, use the mean vector <span class="math notranslate nohighlight">\(\mu\)</span> as-is.</p>
|
||||
</div>
|
||||
<div class="section" id="training-the-network">
|
||||
<h3>Training the network<a class="headerlink" href="#training-the-network" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Start by sampling a batch of transitions from the experience replay.</p>
|
||||
<ul>
|
||||
<li><p>To train the two <strong>critic networks</strong>, use the following targets:</p>
|
||||
<p><span class="math notranslate nohighlight">\(y_t=r(s_t,a_t )+\gamma \cdot \min_{i=1,2} Q_{i}(s_{t+1},\mu(s_{t+1} )+[\mathcal{N}(0,\,\sigma^{2})]^{MAX\_NOISE}_{MIN\_NOISE})\)</span></p>
|
||||
<p>First run the actor target network, using the next states as the inputs, and get <span class="math notranslate nohighlight">\(\mu (s_{t+1} )\)</span>. Then, add a
|
||||
clipped gaussian noise to these actions, and clip the resulting actions to the actions space.
|
||||
Next, run the critic target networks using the next states and <span class="math notranslate nohighlight">\(\mu (s_{t+1} )+[\mathcal{N}(0,\,\sigma^{2})]^{MAX\_NOISE}_{MIN\_NOISE}\)</span>,
|
||||
and use the minimum between the two critic networks predictions in order to calculate <span class="math notranslate nohighlight">\(y_t\)</span> according to the
|
||||
equation above. To train the networks, use the current states and actions as the inputs, and <span class="math notranslate nohighlight">\(y_t\)</span>
|
||||
as the targets.</p>
|
||||
</li>
|
||||
<li><p>To train the <strong>actor network</strong>, use the following equation:</p>
|
||||
<p><span class="math notranslate nohighlight">\(\nabla_{\theta^\mu } J \approx E_{s_t \tilde{} \rho^\beta } [\nabla_a Q_{1}(s,a)|_{s=s_t,a=\mu (s_t ) } \cdot \nabla_{\theta^\mu} \mu(s)|_{s=s_t} ]\)</span></p>
|
||||
<p>Use the actor’s online network to get the action mean values using the current states as the inputs.
|
||||
Then, use the first critic’s online network in order to get the gradients of the critic output with respect to the
|
||||
action mean values <span class="math notranslate nohighlight">\(\nabla _a Q_{1}(s,a)|_{s=s_t,a=\mu(s_t ) }\)</span>.
|
||||
Using the chain rule, calculate the gradients of the actor’s output, with respect to the actor weights,
|
||||
given <span class="math notranslate nohighlight">\(\nabla_a Q(s,a)\)</span>. Finally, apply those gradients to the actor network.</p>
|
||||
<p>The actor’s training is done at a slower frequency than the critic’s training, in order to allow the critic to better fit the
|
||||
current policy, before exercising the critic in order to train the actor.
|
||||
Following the same, delayed, actor’s training cadence, do a soft update of the critic and actor target networks’ weights
|
||||
from the online networks.</p>
|
||||
</li>
|
||||
</ul>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.td3_agent.TD3AlgorithmParameters">
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.td3_agent.</code><code class="sig-name descname">TD3AlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/td3_agent.html#TD3AlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.td3_agent.TD3AlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>num_steps_between_copying_online_weights_to_target</strong> – (StepMethod)
|
||||
The number of steps between copying the online network weights to the target network weights.</p></li>
|
||||
<li><p><strong>rate_for_copying_weights_to_target</strong> – (float)
|
||||
When copying the online network weights to the target network weights, a soft update will be used, which
|
||||
weight the new online network weights by rate_for_copying_weights_to_target</p></li>
|
||||
<li><p><strong>num_consecutive_playing_steps</strong> – (StepMethod)
|
||||
The number of consecutive steps to act between every two training iterations</p></li>
|
||||
<li><p><strong>use_target_network_for_evaluation</strong> – (bool)
|
||||
If set to True, the target network will be used for predicting the actions when choosing actions to act.
|
||||
Since the target network weights change more slowly, the predicted actions will be more consistent.</p></li>
|
||||
<li><p><strong>action_penalty</strong> – (float)
|
||||
The amount by which to penalize the network on high action feature (pre-activation) values.
|
||||
This can prevent the actions features from saturating the TanH activation function, and therefore prevent the
|
||||
gradients from becoming very low.</p></li>
|
||||
<li><p><strong>clip_critic_targets</strong> – (Tuple[float, float] or None)
|
||||
The range to clip the critic target to in order to prevent overestimation of the action values.</p></li>
|
||||
<li><p><strong>use_non_zero_discount_for_terminal_states</strong> – (bool)
|
||||
If set to True, the discount factor will be used for terminal states to bootstrap the next predicted state
|
||||
values. If set to False, the terminal states reward will be taken as the target return for the network.</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="sac.html" class="btn btn-neutral float-right" title="Soft Actor-Critic" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
|
||||
|
||||
<a href="ddpg.html" class="btn btn-neutral float-left" title="Deep Deterministic Policy Gradient" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2018-2019, Intel AI Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -126,6 +126,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
|
||||
@@ -124,6 +124,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -244,7 +245,7 @@ probability distribution. Only the target of the actions that were actually ta
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.categorical_dqn_agent.</code><code class="descname">CategoricalDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/categorical_dqn_agent.html#CategoricalDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.categorical_dqn_agent.</code><code class="sig-name descname">CategoricalDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/categorical_dqn_agent.html#CategoricalDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Double DQN</a><ul>
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -243,7 +244,7 @@ Set those values as the targets for the actions that were not actually played.</
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.dqn_agent.DQNAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.dqn_agent.</code><code class="descname">DQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dqn_agent.html#DQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dqn_agent.DQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.dqn_agent.</code><code class="sig-name descname">DQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dqn_agent.html#DQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dqn_agent.DQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -240,7 +241,7 @@
|
||||
Once in every few thousand steps, copy the weights from the online network to the target network.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.mmc_agent.</code><code class="descname">MixedMonteCarloAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/mmc_agent.html#MixedMonteCarloAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.mmc_agent.</code><code class="sig-name descname">MixedMonteCarloAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/mmc_agent.html#MixedMonteCarloAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><p><strong>monte_carlo_mixing_rate</strong> – (float)
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -242,7 +243,7 @@ where <span class="math notranslate nohighlight">\(k\)</span> is <span class="ma
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.n_step_q_agent.</code><code class="descname">NStepQAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/n_step_q_agent.html#NStepQAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.n_step_q_agent.</code><code class="sig-name descname">NStepQAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/n_step_q_agent.html#NStepQAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -243,7 +244,7 @@ and <span class="math notranslate nohighlight">\(y_t\)</span> as the targets.
|
||||
After every training step, use a soft update in order to copy the weights from the online network to the target network.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.naf_agent.NAFAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.naf_agent.</code><code class="descname">NAFAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/naf_agent.html#NAFAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.naf_agent.NAFAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.naf_agent.</code><code class="sig-name descname">NAFAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/naf_agent.html#NAFAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.naf_agent.NAFAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -258,7 +259,7 @@ the network if necessary:
|
||||
<span class="math notranslate nohighlight">\(y_t=\sum_{j=0}^{N-1}\gamma^j r(s_{t+j},a_{t+j} ) +\gamma^N max_a Q(s_{t+N},a)\)</span></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.nec_agent.NECAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.nec_agent.</code><code class="descname">NECAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/nec_agent.html#NECAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.nec_agent.NECAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.nec_agent.</code><code class="sig-name descname">NECAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/nec_agent.html#NECAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.nec_agent.NECAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -251,7 +252,7 @@ has the highest predicted <span class="math notranslate nohighlight">\(Q\)</span
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.pal_agent.PALAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.pal_agent.</code><code class="descname">PALAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/pal_agent.html#PALAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.pal_agent.PALAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.pal_agent.</code><code class="sig-name descname">PALAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/pal_agent.html#PALAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.pal_agent.PALAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -241,7 +242,7 @@ quantile locations. Only the targets of the actions that were actually taken are
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.qr_dqn_agent.</code><code class="descname">QuantileRegressionDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/qr_dqn_agent.html#QuantileRegressionDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.qr_dqn_agent.</code><code class="sig-name descname">QuantileRegressionDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/qr_dqn_agent.html#QuantileRegressionDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -117,6 +117,7 @@
|
||||
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
|
||||
@@ -256,7 +257,7 @@ using the KL divergence loss that is returned from the network.</p></li>
|
||||
</ol>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.rainbow_dqn_agent.</code><code class="descname">RainbowDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/rainbow_dqn_agent.html#RainbowDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.rainbow_dqn_agent.</code><code class="sig-name descname">RainbowDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/rainbow_dqn_agent.html#RainbowDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -196,7 +196,7 @@ own components under a dedicated directory. For example, tensorflow components w
|
||||
parts that are implemented using TensorFlow.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.base_parameters.NetworkParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">NetworkParameters</code><span class="sig-paren">(</span><em>force_cpu=False</em>, <em>async_training=False</em>, <em>shared_optimizer=True</em>, <em>scale_down_gradients_by_number_of_workers_for_sync_training=True</em>, <em>clip_gradients=None</em>, <em>gradients_clipping_method=<GradientClippingMethod.ClipByGlobalNorm: 0></em>, <em>l2_regularization=0</em>, <em>learning_rate=0.00025</em>, <em>learning_rate_decay_rate=0</em>, <em>learning_rate_decay_steps=0</em>, <em>input_embedders_parameters={}</em>, <em>embedding_merger_type=<EmbeddingMergerType.Concat: 0></em>, <em>middleware_parameters=None</em>, <em>heads_parameters=[]</em>, <em>use_separate_networks_per_head=False</em>, <em>optimizer_type='Adam'</em>, <em>optimizer_epsilon=0.0001</em>, <em>adam_optimizer_beta1=0.9</em>, <em>adam_optimizer_beta2=0.99</em>, <em>rms_prop_optimizer_decay=0.9</em>, <em>batch_size=32</em>, <em>replace_mse_with_huber_loss=False</em>, <em>create_target_network=False</em>, <em>tensorflow_support=True</em>, <em>softmax_temperature=1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/base_parameters.html#NetworkParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.NetworkParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.base_parameters.</code><code class="sig-name descname">NetworkParameters</code><span class="sig-paren">(</span><em class="sig-param">force_cpu=False</em>, <em class="sig-param">async_training=False</em>, <em class="sig-param">shared_optimizer=True</em>, <em class="sig-param">scale_down_gradients_by_number_of_workers_for_sync_training=True</em>, <em class="sig-param">clip_gradients=None</em>, <em class="sig-param">gradients_clipping_method=<GradientClippingMethod.ClipByGlobalNorm: 0></em>, <em class="sig-param">l2_regularization=0</em>, <em class="sig-param">learning_rate=0.00025</em>, <em class="sig-param">learning_rate_decay_rate=0</em>, <em class="sig-param">learning_rate_decay_steps=0</em>, <em class="sig-param">input_embedders_parameters={}</em>, <em class="sig-param">embedding_merger_type=<EmbeddingMergerType.Concat: 0></em>, <em class="sig-param">middleware_parameters=None</em>, <em class="sig-param">heads_parameters=[]</em>, <em class="sig-param">use_separate_networks_per_head=False</em>, <em class="sig-param">optimizer_type='Adam'</em>, <em class="sig-param">optimizer_epsilon=0.0001</em>, <em class="sig-param">adam_optimizer_beta1=0.9</em>, <em class="sig-param">adam_optimizer_beta2=0.99</em>, <em class="sig-param">rms_prop_optimizer_decay=0.9</em>, <em class="sig-param">batch_size=32</em>, <em class="sig-param">replace_mse_with_huber_loss=False</em>, <em class="sig-param">create_target_network=False</em>, <em class="sig-param">tensorflow_support=True</em>, <em class="sig-param">softmax_temperature=1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/base_parameters.html#NetworkParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.NetworkParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -268,7 +268,7 @@ online network at will.</p></li>
|
||||
<h2>Architecture<a class="headerlink" href="#architecture" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.architectures.architecture.</code><code class="descname">Architecture</code><span class="sig-paren">(</span><em>agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em>spaces: rl_coach.spaces.SpacesDefinition</em>, <em>name: str = ''</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.architectures.architecture.</code><code class="sig-name descname">Architecture</code><span class="sig-paren">(</span><em class="sig-param">agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em class="sig-param">spaces: rl_coach.spaces.SpacesDefinition</em>, <em class="sig-param">name: str = ''</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Creates a neural network ‘architecture’, that can be trained and used for inference.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -281,7 +281,7 @@ online network at will.</p></li>
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.accumulate_gradients">
|
||||
<code class="descname">accumulate_gradients</code><span class="sig-paren">(</span><em>inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], additional_fetches: list = None, importance_weights: numpy.ndarray = None, no_accumulation: bool = False</em><span class="sig-paren">)</span> → Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.accumulate_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.accumulate_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">accumulate_gradients</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], additional_fetches: list = None, importance_weights: numpy.ndarray = None, no_accumulation: bool = False</em><span class="sig-paren">)</span> → Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.accumulate_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.accumulate_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the
|
||||
gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient
|
||||
values if required and then accumulate gradients from all learners. It does not update the model weights,
|
||||
@@ -324,7 +324,7 @@ fetched_tensors: all values for additional_fetches</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.apply_and_reset_gradients">
|
||||
<code class="descname">apply_and_reset_gradients</code><span class="sig-paren">(</span><em>gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_and_reset_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_and_reset_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">apply_and_reset_gradients</code><span class="sig-paren">(</span><em class="sig-param">gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_and_reset_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_and_reset_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Applies the given gradients to the network weights and resets the gradient accumulations.
|
||||
Has the same impact as calling <cite>apply_gradients</cite>, then <cite>reset_accumulated_gradients</cite>.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -340,7 +340,7 @@ of an identical network (either self or another identical network)</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.apply_gradients">
|
||||
<code class="descname">apply_gradients</code><span class="sig-paren">(</span><em>gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">apply_gradients</code><span class="sig-paren">(</span><em class="sig-param">gradients: List[numpy.ndarray], scaler: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.apply_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.apply_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Applies the given gradients to the network weights.
|
||||
Will be performed sync or async depending on <cite>network_parameters.async_training</cite></p>
|
||||
<dl class="field-list simple">
|
||||
@@ -356,7 +356,7 @@ of an identical network (either self or another identical network)</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.collect_savers">
|
||||
<code class="descname">collect_savers</code><span class="sig-paren">(</span><em>parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">collect_savers</code><span class="sig-paren">(</span><em class="sig-param">parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Collection of all savers for the network (typically only one saver for network and one for ONNX export)
|
||||
:param parent_path_suffix: path suffix of the parent of the network</p>
|
||||
<blockquote>
|
||||
@@ -369,9 +369,9 @@ of an identical network (either self or another identical network)</p></li>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="staticmethod">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.construct">
|
||||
<em class="property">static </em><code class="descname">construct</code><span class="sig-paren">(</span><em>variable_scope: str, devices: List[str], *args, **kwargs</em><span class="sig-paren">)</span> → rl_coach.architectures.architecture.Architecture<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.construct"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.construct" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">static </em><code class="sig-name descname">construct</code><span class="sig-paren">(</span><em class="sig-param">variable_scope: str, devices: List[str], *args, **kwargs</em><span class="sig-paren">)</span> → rl_coach.architectures.architecture.Architecture<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.construct"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.construct" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Construct a network class using the provided variable scope and on requested devices
|
||||
:param variable_scope: string specifying variable scope under which to create network variables
|
||||
:param devices: list of devices (can be list of Device objects, or string for TF distributed)
|
||||
@@ -382,7 +382,7 @@ of an identical network (either self or another identical network)</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.get_variable_value">
|
||||
<code class="descname">get_variable_value</code><span class="sig-paren">(</span><em>variable: Any</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_variable_value" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_variable_value</code><span class="sig-paren">(</span><em class="sig-param">variable: Any</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_variable_value" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Gets value of a specified variable. Type of variable is dependant on the framework.
|
||||
Example of a variable is head.kl_coefficient, which could be a symbol for evaluation
|
||||
or could be a string representing the value.</p>
|
||||
@@ -398,7 +398,7 @@ or could be a string representing the value.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.get_weights">
|
||||
<code class="descname">get_weights</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List[numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_weights" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_weights</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List[numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.get_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.get_weights" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -407,9 +407,9 @@ or could be a string representing the value.</p>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="staticmethod">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.parallel_predict">
|
||||
<em class="property">static </em><code class="descname">parallel_predict</code><span class="sig-paren">(</span><em>sess: Any, network_input_tuples: List[Tuple[Architecture, Dict[str, numpy.ndarray]]]</em><span class="sig-paren">)</span> → Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.parallel_predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.parallel_predict" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">static </em><code class="sig-name descname">parallel_predict</code><span class="sig-paren">(</span><em class="sig-param">sess: Any, network_input_tuples: List[Tuple[Architecture, Dict[str, numpy.ndarray]]]</em><span class="sig-paren">)</span> → Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.parallel_predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.parallel_predict" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -425,7 +425,7 @@ or could be a string representing the value.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.predict">
|
||||
<code class="descname">predict</code><span class="sig-paren">(</span><em>inputs: Dict[str, numpy.ndarray], outputs: List[Any] = None, squeeze_output: bool = True, initial_feed_dict: Dict[Any, numpy.ndarray] = None</em><span class="sig-paren">)</span> → Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.predict" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">predict</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], outputs: List[Any] = None, squeeze_output: bool = True, initial_feed_dict: Dict[Any, numpy.ndarray] = None</em><span class="sig-paren">)</span> → Tuple[numpy.ndarray, ...]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.predict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.predict" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given input observations, use the model to make predictions (e.g. action or value).</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -446,7 +446,7 @@ depends on the framework backend.</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients">
|
||||
<code class="descname">reset_accumulated_gradients</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.reset_accumulated_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">reset_accumulated_gradients</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.reset_accumulated_gradients"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sets gradient of all parameters to 0.</p>
|
||||
<p>Once gradients are reset, they must be accessible by <cite>accumulated_gradients</cite> property of this class,
|
||||
which must return a list of numpy ndarrays. Child class must ensure that <cite>accumulated_gradients</cite> is set.</p>
|
||||
@@ -454,7 +454,7 @@ which must return a list of numpy ndarrays. Child class must ensure that <cite>a
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.set_variable_value">
|
||||
<code class="descname">set_variable_value</code><span class="sig-paren">(</span><em>assign_op: Any</em>, <em>value: numpy.ndarray</em>, <em>placeholder: Any</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_variable_value" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_variable_value</code><span class="sig-paren">(</span><em class="sig-param">assign_op: Any</em>, <em class="sig-param">value: numpy.ndarray</em>, <em class="sig-param">placeholder: Any</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_variable_value"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_variable_value" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Updates the value of a specified variable. Type of assign_op is dependant on the framework
|
||||
and is a unique identifier for assigning value to a variable. For example an agent may use
|
||||
head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder
|
||||
@@ -472,7 +472,7 @@ head.assign_kl_coefficient. There is a one to one mapping between assign_op and
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.set_weights">
|
||||
<code class="descname">set_weights</code><span class="sig-paren">(</span><em>weights: List[numpy.ndarray], rate: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_weights" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_weights</code><span class="sig-paren">(</span><em class="sig-param">weights: List[numpy.ndarray], rate: float = 1.0</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.set_weights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.set_weights" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sets model weights for provided layer parameters.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -490,7 +490,7 @@ i.e. new_weight = rate * given_weight + (1 - rate) * old_weight</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.architecture.Architecture.train_on_batch">
|
||||
<code class="descname">train_on_batch</code><span class="sig-paren">(</span><em>inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], scaler: float = 1.0, additional_fetches: list = None, importance_weights: numpy.ndarray = None</em><span class="sig-paren">)</span> → Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.train_on_batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.train_on_batch" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">train_on_batch</code><span class="sig-paren">(</span><em class="sig-param">inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], scaler: float = 1.0, additional_fetches: list = None, importance_weights: numpy.ndarray = None</em><span class="sig-paren">)</span> → Tuple[float, List[float], float, list]<a class="reference internal" href="../../_modules/rl_coach/architectures/architecture.html#Architecture.train_on_batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.architecture.Architecture.train_on_batch" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a
|
||||
forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to
|
||||
update the weights.
|
||||
@@ -535,7 +535,7 @@ fetched_tensors: all values for additional_fetches</p>
|
||||
<a class="reference internal image-reference" href="../../_images/distributed.png"><img alt="../../_images/distributed.png" class="align-center" src="../../_images/distributed.png" style="width: 600px;" /></a>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.architectures.network_wrapper.</code><code class="descname">NetworkWrapper</code><span class="sig-paren">(</span><em>agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em>has_target: bool</em>, <em>has_global: bool</em>, <em>name: str</em>, <em>spaces: rl_coach.spaces.SpacesDefinition</em>, <em>replicated_device=None</em>, <em>worker_device=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.architectures.network_wrapper.</code><code class="sig-name descname">NetworkWrapper</code><span class="sig-paren">(</span><em class="sig-param">agent_parameters: rl_coach.base_parameters.AgentParameters</em>, <em class="sig-param">has_target: bool</em>, <em class="sig-param">has_global: bool</em>, <em class="sig-param">name: str</em>, <em class="sig-param">spaces: rl_coach.spaces.SpacesDefinition</em>, <em class="sig-param">replicated_device=None</em>, <em class="sig-param">worker_device=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The network wrapper contains multiple copies of the same network, each one with a different set of weights which is
|
||||
updating in a different time scale. The network wrapper will always contain an online network.
|
||||
It will contain an additional slow updating target network if it was requested by the user,
|
||||
@@ -544,7 +544,7 @@ multi-process distributed mode. The network wrapper contains functionality for m
|
||||
between them.</p>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_and_sync_networks">
|
||||
<code class="descname">apply_gradients_and_sync_networks</code><span class="sig-paren">(</span><em>reset_gradients=True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_and_sync_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">apply_gradients_and_sync_networks</code><span class="sig-paren">(</span><em class="sig-param">reset_gradients=True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_and_sync_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Applies the gradients accumulated in the online network to the global network or to itself and syncs the
|
||||
networks if necessary</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -559,7 +559,7 @@ complexity for this function by around 10%</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_global_network">
|
||||
<code class="descname">apply_gradients_to_global_network</code><span class="sig-paren">(</span><em>gradients=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_global_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_global_network" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">apply_gradients_to_global_network</code><span class="sig-paren">(</span><em class="sig-param">gradients=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_global_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_global_network" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Apply gradients from the online network on the global network</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -573,7 +573,7 @@ complexity for this function by around 10%</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_online_network">
|
||||
<code class="descname">apply_gradients_to_online_network</code><span class="sig-paren">(</span><em>gradients=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_online_network" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">apply_gradients_to_online_network</code><span class="sig-paren">(</span><em class="sig-param">gradients=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.apply_gradients_to_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.apply_gradients_to_online_network" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Apply gradients from the online network on itself</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -584,7 +584,7 @@ complexity for this function by around 10%</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers">
|
||||
<code class="descname">collect_savers</code><span class="sig-paren">(</span><em>parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">collect_savers</code><span class="sig-paren">(</span><em class="sig-param">parent_path_suffix: str</em><span class="sig-paren">)</span> → rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Collect all of network’s savers for global or online network
|
||||
Note: global, online, and target network are all copies fo the same network which parameters that are</p>
|
||||
<blockquote>
|
||||
@@ -610,7 +610,7 @@ for saving.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction">
|
||||
<code class="descname">parallel_prediction</code><span class="sig-paren">(</span><em>network_input_tuples: List[Tuple]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.parallel_prediction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">parallel_prediction</code><span class="sig-paren">(</span><em class="sig-param">network_input_tuples: List[Tuple]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.parallel_prediction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Run several network prediction in parallel. Currently this only supports running each of the network once.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -625,7 +625,7 @@ target_network or global_network) and the second element is the inputs</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training">
|
||||
<code class="descname">set_is_training</code><span class="sig-paren">(</span><em>state: bool</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.set_is_training"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_is_training</code><span class="sig-paren">(</span><em class="sig-param">state: bool</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.set_is_training"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Set the phase of the network between training and testing</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -639,7 +639,7 @@ target_network or global_network) and the second element is the inputs</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.sync">
|
||||
<code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.sync" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.sync" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Initializes the weights of the networks to match each other</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -650,7 +650,7 @@ target_network or global_network) and the second element is the inputs</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.train_and_sync_networks">
|
||||
<code class="descname">train_and_sync_networks</code><span class="sig-paren">(</span><em>inputs</em>, <em>targets</em>, <em>additional_fetches=[]</em>, <em>importance_weights=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.train_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.train_and_sync_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">train_and_sync_networks</code><span class="sig-paren">(</span><em class="sig-param">inputs</em>, <em class="sig-param">targets</em>, <em class="sig-param">additional_fetches=[]</em>, <em class="sig-param">importance_weights=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.train_and_sync_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.train_and_sync_networks" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A generic training function that enables multi-threading training using a global network if necessary.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -670,7 +670,7 @@ error of this sample. If it is not given, the samples losses won’t be scaled</
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.update_online_network">
|
||||
<code class="descname">update_online_network</code><span class="sig-paren">(</span><em>rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_online_network" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_online_network</code><span class="sig-paren">(</span><em class="sig-param">rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_online_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_online_network" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Copy weights: global network >>> online network</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -681,7 +681,7 @@ error of this sample. If it is not given, the samples losses won’t be scaled</
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.architectures.network_wrapper.NetworkWrapper.update_target_network">
|
||||
<code class="descname">update_target_network</code><span class="sig-paren">(</span><em>rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_target_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_target_network" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_target_network</code><span class="sig-paren">(</span><em class="sig-param">rate=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/architectures/network_wrapper.html#NetworkWrapper.update_target_network"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.architectures.network_wrapper.NetworkWrapper.update_target_network" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Copy weights: online network >>> target network</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
|
||||
@@ -197,7 +197,7 @@
|
||||
<h2>ActionInfo<a class="headerlink" href="#actioninfo" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.core_types.ActionInfo">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.core_types.</code><code class="descname">ActionInfo</code><span class="sig-paren">(</span><em>action: Union[int, float, numpy.ndarray, List], all_action_probabilities: float = 0, action_value: float = 0.0, state_value: float = 0.0, max_action_value: float = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#ActionInfo"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.ActionInfo" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.core_types.</code><code class="sig-name descname">ActionInfo</code><span class="sig-paren">(</span><em class="sig-param">action: Union[int, float, numpy.ndarray, List], all_action_probabilities: float = 0, action_value: float = 0.0, state_value: float = 0.0, max_action_value: float = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#ActionInfo"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.ActionInfo" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Action info is a class that holds an action and various additional information details about it</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -219,7 +219,7 @@ action with the maximum value</p></li>
|
||||
<h2>Batch<a class="headerlink" href="#batch" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.core_types.Batch">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.core_types.</code><code class="descname">Batch</code><span class="sig-paren">(</span><em>transitions: List[rl_coach.core_types.Transition]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.core_types.</code><code class="sig-name descname">Batch</code><span class="sig-paren">(</span><em class="sig-param">transitions: List[rl_coach.core_types.Transition]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A wrapper around a list of transitions that helps extracting batches of parameters from it.
|
||||
For example, one can extract a list of states corresponding to the list of transitions.
|
||||
The class uses lazy evaluation in order to return each of the available parameters.</p>
|
||||
@@ -230,7 +230,7 @@ The class uses lazy evaluation in order to return each of the available paramete
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.actions">
|
||||
<code class="descname">actions</code><span class="sig-paren">(</span><em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.actions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.actions" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">actions</code><span class="sig-paren">(</span><em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.actions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.actions" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>if the actions were not converted to a batch before, extract them to a batch and then return the batch</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -244,7 +244,7 @@ The class uses lazy evaluation in order to return each of the available paramete
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.game_overs">
|
||||
<code class="descname">game_overs</code><span class="sig-paren">(</span><em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.game_overs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.game_overs" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">game_overs</code><span class="sig-paren">(</span><em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.game_overs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.game_overs" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>if the game_overs were not converted to a batch before, extract them to a batch and then return the batch</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -258,7 +258,7 @@ The class uses lazy evaluation in order to return each of the available paramete
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.goals">
|
||||
<code class="descname">goals</code><span class="sig-paren">(</span><em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.goals"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.goals" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">goals</code><span class="sig-paren">(</span><em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.goals"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.goals" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>if the goals were not converted to a batch before, extract them to a batch and then return the batch
|
||||
if the goal was not filled, this will raise an exception</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -273,7 +273,7 @@ if the goal was not filled, this will raise an exception</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.info">
|
||||
<code class="descname">info</code><span class="sig-paren">(</span><em>key</em>, <em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.info" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">info</code><span class="sig-paren">(</span><em class="sig-param">key</em>, <em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.info" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>if the given info dictionary key was not converted to a batch before, extract it to a batch and then return the
|
||||
batch. if the key is not part of the keys in the info dictionary, this will raise an exception</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -288,7 +288,7 @@ batch. if the key is not part of the keys in the info dictionary, this will rais
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.info_as_list">
|
||||
<code class="descname">info_as_list</code><span class="sig-paren">(</span><em>key</em><span class="sig-paren">)</span> → list<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.info_as_list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.info_as_list" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">info_as_list</code><span class="sig-paren">(</span><em class="sig-param">key</em><span class="sig-paren">)</span> → list<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.info_as_list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.info_as_list" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>get the info and store it internally as a list, if wasn’t stored before. return it as a list
|
||||
:param expand_dims: add an extra dimension to the info batch
|
||||
:return: a list containing all the info values of the batch corresponding to the given key</p>
|
||||
@@ -296,7 +296,7 @@ batch. if the key is not part of the keys in the info dictionary, this will rais
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.n_step_discounted_rewards">
|
||||
<code class="descname">n_step_discounted_rewards</code><span class="sig-paren">(</span><em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.n_step_discounted_rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.n_step_discounted_rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">n_step_discounted_rewards</code><span class="sig-paren">(</span><em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.n_step_discounted_rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.n_step_discounted_rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>if the n_step_discounted_rewards were not converted to a batch before, extract them to a batch and then return</dt><dd><p>the batch</p>
|
||||
</dd>
|
||||
@@ -308,7 +308,7 @@ batch. if the key is not part of the keys in the info dictionary, this will rais
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.next_states">
|
||||
<code class="descname">next_states</code><span class="sig-paren">(</span><em>fetches: List[str], expand_dims=False</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.next_states"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.next_states" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">next_states</code><span class="sig-paren">(</span><em class="sig-param">fetches: List[str], expand_dims=False</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.next_states"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.next_states" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>follow the keys in fetches to extract the corresponding items from the next states in the batch
|
||||
if these keys were not already extracted before. return only the values corresponding to those keys</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -326,7 +326,7 @@ if these keys were not already extracted before. return only the values correspo
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.rewards">
|
||||
<code class="descname">rewards</code><span class="sig-paren">(</span><em>expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">rewards</code><span class="sig-paren">(</span><em class="sig-param">expand_dims=False</em><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>if the rewards were not converted to a batch before, extract them to a batch and then return the batch</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -340,7 +340,7 @@ if these keys were not already extracted before. return only the values correspo
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.shuffle">
|
||||
<code class="descname">shuffle</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.shuffle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.shuffle" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">shuffle</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.shuffle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.shuffle" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Shuffle all the transitions in the batch</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -349,9 +349,9 @@ if these keys were not already extracted before. return only the values correspo
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.size">
|
||||
<code class="descname">size</code><a class="headerlink" href="#rl_coach.core_types.Batch.size" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">size</code><a class="headerlink" href="#rl_coach.core_types.Batch.size" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
<dd class="field-odd"><p>the size of the batch</p>
|
||||
@@ -361,7 +361,7 @@ if these keys were not already extracted before. return only the values correspo
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.slice">
|
||||
<code class="descname">slice</code><span class="sig-paren">(</span><em>start</em>, <em>end</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.slice"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.slice" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">slice</code><span class="sig-paren">(</span><em class="sig-param">start</em>, <em class="sig-param">end</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.slice"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.slice" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Keep a slice from the batch and discard the rest of the batch</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -378,7 +378,7 @@ if these keys were not already extracted before. return only the values correspo
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Batch.states">
|
||||
<code class="descname">states</code><span class="sig-paren">(</span><em>fetches: List[str], expand_dims=False</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.states"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.states" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">states</code><span class="sig-paren">(</span><em class="sig-param">fetches: List[str], expand_dims=False</em><span class="sig-paren">)</span> → Dict[str, numpy.ndarray]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Batch.states"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Batch.states" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>follow the keys in fetches to extract the corresponding items from the states in the batch
|
||||
if these keys were not already extracted before. return only the values corresponding to those keys</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -401,7 +401,7 @@ if these keys were not already extracted before. return only the values correspo
|
||||
<h2>EnvResponse<a class="headerlink" href="#envresponse" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.core_types.EnvResponse">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.core_types.</code><code class="descname">EnvResponse</code><span class="sig-paren">(</span><em>next_state: Dict[str, numpy.ndarray], reward: Union[int, float, numpy.ndarray], game_over: bool, info: Dict = None, goal: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#EnvResponse"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.EnvResponse" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.core_types.</code><code class="sig-name descname">EnvResponse</code><span class="sig-paren">(</span><em class="sig-param">next_state: Dict[str, numpy.ndarray], reward: Union[int, float, numpy.ndarray], game_over: bool, info: Dict = None, goal: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#EnvResponse"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.EnvResponse" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An env response is a collection containing the information returning from the environment after a single action
|
||||
has been performed on it.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -424,7 +424,7 @@ the execution of the action.</p></li>
|
||||
<h2>Episode<a class="headerlink" href="#episode" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.core_types.Episode">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.core_types.</code><code class="descname">Episode</code><span class="sig-paren">(</span><em>discount: float = 0.99</em>, <em>bootstrap_total_return_from_old_policy: bool = False</em>, <em>n_step: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.core_types.</code><code class="sig-name descname">Episode</code><span class="sig-paren">(</span><em class="sig-param">discount: float = 0.99</em>, <em class="sig-param">bootstrap_total_return_from_old_policy: bool = False</em>, <em class="sig-param">n_step: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An Episode represents a set of sequential transitions, that end with a terminal state.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -438,7 +438,7 @@ memory</p></li>
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.get_first_transition">
|
||||
<code class="descname">get_first_transition</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_first_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_first_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_first_transition</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_first_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_first_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the first transition in the episode, or None if there are no transitions available</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -449,7 +449,7 @@ memory</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.get_last_transition">
|
||||
<code class="descname">get_last_transition</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_last_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_last_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_last_transition</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_last_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_last_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the last transition in the episode, or None if there are no transition available</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -460,7 +460,7 @@ memory</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.get_transition">
|
||||
<code class="descname">get_transition</code><span class="sig-paren">(</span><em>transition_idx: int</em><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_transition</code><span class="sig-paren">(</span><em class="sig-param">transition_idx: int</em><span class="sig-paren">)</span> → rl_coach.core_types.Transition<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_transition" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get a specific transition by its index.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -474,7 +474,7 @@ memory</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.get_transitions_attribute">
|
||||
<code class="descname">get_transitions_attribute</code><span class="sig-paren">(</span><em>attribute_name: str</em><span class="sig-paren">)</span> → List[Any]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_transitions_attribute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_transitions_attribute" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_transitions_attribute</code><span class="sig-paren">(</span><em class="sig-param">attribute_name: str</em><span class="sig-paren">)</span> → List[Any]<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.get_transitions_attribute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.get_transitions_attribute" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the values for some transition attribute from all the transitions in the episode.
|
||||
For example, this allows getting the rewards for all the transitions as a list by calling
|
||||
get_transitions_attribute(‘reward’)</p>
|
||||
@@ -490,7 +490,7 @@ get_transitions_attribute(‘reward’)</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.insert">
|
||||
<code class="descname">insert</code><span class="sig-paren">(</span><em>transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.insert"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.insert" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">insert</code><span class="sig-paren">(</span><em class="sig-param">transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.insert"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.insert" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Insert a new transition to the episode. If the game_over flag in the transition is set to True,
|
||||
the episode will be marked as complete.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -505,7 +505,7 @@ the episode will be marked as complete.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.is_empty">
|
||||
<code class="descname">is_empty</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.is_empty"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.is_empty" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">is_empty</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.is_empty"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.is_empty" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Check if the episode is empty</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -516,7 +516,7 @@ the episode will be marked as complete.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.length">
|
||||
<code class="descname">length</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → int<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.length"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.length" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">length</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → int<a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.length"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.length" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Return the length of the episode, which is the number of transitions it holds.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -527,7 +527,7 @@ the episode will be marked as complete.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.core_types.Episode.update_discounted_rewards">
|
||||
<code class="descname">update_discounted_rewards</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.update_discounted_rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.update_discounted_rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">update_discounted_rewards</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Episode.update_discounted_rewards"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Episode.update_discounted_rewards" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Update the discounted returns for all the transitions in the episode.
|
||||
The returns will be calculated according to the rewards of each transition, together with the number of steps
|
||||
to bootstrap from and the discount factor, as defined by n_step and discount respectively when initializing
|
||||
@@ -546,7 +546,7 @@ the episode.</p>
|
||||
<h2>Transition<a class="headerlink" href="#transition" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.core_types.Transition">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.core_types.</code><code class="descname">Transition</code><span class="sig-paren">(</span><em>state: Dict[str</em>, <em>numpy.ndarray] = None</em>, <em>action: Union[int</em>, <em>float</em>, <em>numpy.ndarray</em>, <em>List] = None</em>, <em>reward: Union[int</em>, <em>float</em>, <em>numpy.ndarray] = None</em>, <em>next_state: Dict[str</em>, <em>numpy.ndarray] = None</em>, <em>game_over: bool = None</em>, <em>info: Dict = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Transition" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.core_types.</code><code class="sig-name descname">Transition</code><span class="sig-paren">(</span><em class="sig-param">state: Dict[str</em>, <em class="sig-param">numpy.ndarray] = None</em>, <em class="sig-param">action: Union[int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray</em>, <em class="sig-param">List] = None</em>, <em class="sig-param">reward: Union[int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = None</em>, <em class="sig-param">next_state: Dict[str</em>, <em class="sig-param">numpy.ndarray] = None</em>, <em class="sig-param">game_over: bool = None</em>, <em class="sig-param">info: Dict = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/core_types.html#Transition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.core_types.Transition" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A transition is a tuple containing the information of a single step of interaction
|
||||
between the agent and the environment. The most basic version should contain the following values:
|
||||
(current state, action, reward, next state, game over)
|
||||
|
||||
@@ -194,7 +194,7 @@
|
||||
<h2>S3DataStore<a class="headerlink" href="#s3datastore" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.data_stores.s3_data_store.S3DataStore">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.data_stores.s3_data_store.</code><code class="descname">S3DataStore</code><span class="sig-paren">(</span><em>params: rl_coach.data_stores.s3_data_store.S3DataStoreParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/data_stores/s3_data_store.html#S3DataStore"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.data_stores.s3_data_store.S3DataStore" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.data_stores.s3_data_store.</code><code class="sig-name descname">S3DataStore</code><span class="sig-paren">(</span><em class="sig-param">params: rl_coach.data_stores.s3_data_store.S3DataStoreParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/data_stores/s3_data_store.html#S3DataStore"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.data_stores.s3_data_store.S3DataStore" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An implementation of the data store using S3 for storing policy checkpoints when using Coach in distributed mode.
|
||||
The policy checkpoints are written by the trainer and read by the rollout worker.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -209,7 +209,7 @@ The policy checkpoints are written by the trainer and read by the rollout worker
|
||||
<h2>NFSDataStore<a class="headerlink" href="#nfsdatastore" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.data_stores.nfs_data_store.NFSDataStore">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.data_stores.nfs_data_store.</code><code class="descname">NFSDataStore</code><span class="sig-paren">(</span><em>params: rl_coach.data_stores.nfs_data_store.NFSDataStoreParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/data_stores/nfs_data_store.html#NFSDataStore"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.data_stores.nfs_data_store.NFSDataStore" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.data_stores.nfs_data_store.</code><code class="sig-name descname">NFSDataStore</code><span class="sig-paren">(</span><em class="sig-param">params: rl_coach.data_stores.nfs_data_store.NFSDataStoreParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/data_stores/nfs_data_store.html#NFSDataStore"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.data_stores.nfs_data_store.NFSDataStore" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An implementation of data store which uses NFS for storing policy checkpoints when using Coach in distributed mode.
|
||||
The policy checkpoints are written by the trainer and read by the rollout worker.</p>
|
||||
<dl class="field-list simple">
|
||||
|
||||
@@ -195,7 +195,7 @@
|
||||
<h1>Environments<a class="headerlink" href="#environments" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.environment.Environment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.environment.</code><code class="descname">Environment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.environment.</code><code class="sig-name descname">Environment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -210,9 +210,9 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.action_space">
|
||||
<code class="descname">action_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.action_space" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">action_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.action_space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the action space of the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -223,7 +223,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.close">
|
||||
<code class="descname">close</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.close"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.close" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">close</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.close"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.close" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Clean up steps.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -234,7 +234,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.get_action_from_user">
|
||||
<code class="descname">get_action_from_user</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_action_from_user"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_action_from_user" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_action_from_user</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_action_from_user"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_action_from_user" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get an action from the user keyboard</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -245,7 +245,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.get_available_keys">
|
||||
<code class="descname">get_available_keys</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List[Tuple[str, Union[int, float, numpy.ndarray, List]]]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_available_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_available_keys" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_available_keys</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List[Tuple[str, Union[int, float, numpy.ndarray, List]]]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_available_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_available_keys" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Return a list of tuples mapping between action names and the keyboard key that triggers them</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -256,7 +256,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.get_goal">
|
||||
<code class="descname">get_goal</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[None, numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_goal</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[None, numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the current goal that the agents needs to achieve in the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -267,7 +267,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.get_random_action">
|
||||
<code class="descname">get_random_action</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_random_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_random_action" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_random_action</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_random_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_random_action" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Returns an action picked uniformly from the available actions</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -278,7 +278,7 @@ additional arguments which will be ignored by this class, but might be used by o
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.get_rendered_image">
|
||||
<code class="descname">get_rendered_image</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_rendered_image"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_rendered_image" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_rendered_image</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.get_rendered_image"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.get_rendered_image" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Return a numpy array containing the image that will be rendered to the screen.
|
||||
This can be different from the observation. For example, mujoco’s observation is a measurements vector.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -288,9 +288,9 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.goal_space">
|
||||
<code class="descname">goal_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.goal_space" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">goal_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.goal_space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the state space of the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -301,7 +301,7 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.handle_episode_ended">
|
||||
<code class="descname">handle_episode_ended</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.handle_episode_ended"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.handle_episode_ended" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">handle_episode_ended</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.handle_episode_ended"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.handle_episode_ended" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>End an episode</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -310,9 +310,9 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.last_env_response">
|
||||
<code class="descname">last_env_response</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.last_env_response" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">last_env_response</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.last_env_response" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the last environment response</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -321,16 +321,16 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.phase">
|
||||
<code class="descname">phase</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.phase" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">phase</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.phase" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the phase of the environment
|
||||
:return: the current phase</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.render">
|
||||
<code class="descname">render</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.render"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.render" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">render</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.render"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.render" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Call the environment function for rendering to the screen</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -341,7 +341,7 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.reset_internal_state">
|
||||
<code class="descname">reset_internal_state</code><span class="sig-paren">(</span><em>force_environment_reset=False</em><span class="sig-paren">)</span> → rl_coach.core_types.EnvResponse<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.reset_internal_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.reset_internal_state" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">reset_internal_state</code><span class="sig-paren">(</span><em class="sig-param">force_environment_reset=False</em><span class="sig-paren">)</span> → rl_coach.core_types.EnvResponse<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.reset_internal_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.reset_internal_state" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Reset the environment and all the variable of the wrapper</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -355,7 +355,7 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.set_goal">
|
||||
<code class="descname">set_goal</code><span class="sig-paren">(</span><em>goal: Union[None, numpy.ndarray]</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.set_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.set_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">set_goal</code><span class="sig-paren">(</span><em class="sig-param">goal: Union[None, numpy.ndarray]</em><span class="sig-paren">)</span> → None<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.set_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.set_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Set the current goal that the agent needs to achieve in the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -367,9 +367,9 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.state_space">
|
||||
<code class="descname">state_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.state_space" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">property </em><code class="sig-name descname">state_space</code><a class="headerlink" href="#rl_coach.environments.environment.Environment.state_space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get the state space of the environment</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -380,7 +380,7 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.environments.environment.Environment.step">
|
||||
<code class="descname">step</code><span class="sig-paren">(</span><em>action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → rl_coach.core_types.EnvResponse<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.step"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.step" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">step</code><span class="sig-paren">(</span><em class="sig-param">action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → rl_coach.core_types.EnvResponse<a class="reference internal" href="../../_modules/rl_coach/environments/environment.html#Environment.step"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.environment.Environment.step" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Make a single step in the environment using the given action</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -400,7 +400,7 @@ This can be different from the observation. For example, mujoco’s observation
|
||||
<p>Website: <a class="reference external" href="https://github.com/deepmind/dm_control">DeepMind Control Suite</a></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.control_suite_environment.ControlSuiteEnvironment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.control_suite_environment.</code><code class="descname">ControlSuiteEnvironment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection</em>, <em>frame_skip: int</em>, <em>visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em>target_success_rate: float = 1.0</em>, <em>seed: Union[None</em>, <em>int] = None</em>, <em>human_control: bool = False</em>, <em>observation_type: rl_coach.environments.control_suite_environment.ObservationType = <ObservationType.Measurements: 1></em>, <em>custom_reward_threshold: Union[int</em>, <em>float] = None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/control_suite_environment.html#ControlSuiteEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.control_suite_environment.ControlSuiteEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.control_suite_environment.</code><code class="sig-name descname">ControlSuiteEnvironment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection</em>, <em class="sig-param">frame_skip: int</em>, <em class="sig-param">visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em class="sig-param">target_success_rate: float = 1.0</em>, <em class="sig-param">seed: Union[None</em>, <em class="sig-param">int] = None</em>, <em class="sig-param">human_control: bool = False</em>, <em class="sig-param">observation_type: rl_coach.environments.control_suite_environment.ObservationType = <ObservationType.Measurements: 1></em>, <em class="sig-param">custom_reward_threshold: Union[int</em>, <em class="sig-param">float] = None</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/control_suite_environment.html#ControlSuiteEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.control_suite_environment.ControlSuiteEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -438,7 +438,7 @@ Allows defining a custom reward that will be used to decide when the agent succe
|
||||
<p>Website: <a class="reference external" href="https://github.com/deepmind/pysc2">Blizzard Starcraft II</a></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.starcraft2_environment.StarCraft2Environment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.starcraft2_environment.</code><code class="descname">StarCraft2Environment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection</em>, <em>frame_skip: int</em>, <em>visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em>target_success_rate: float = 1.0</em>, <em>seed: Union[None</em>, <em>int] = None</em>, <em>human_control: bool = False</em>, <em>custom_reward_threshold: Union[int</em>, <em>float] = None</em>, <em>screen_size: int = 84</em>, <em>minimap_size: int = 64</em>, <em>feature_minimap_maps_to_use: List = range(0</em>, <em>7)</em>, <em>feature_screen_maps_to_use: List = range(0</em>, <em>17)</em>, <em>observation_type: rl_coach.environments.starcraft2_environment.StarcraftObservationType = <StarcraftObservationType.Features: 0></em>, <em>disable_fog: bool = False</em>, <em>auto_select_all_army: bool = True</em>, <em>use_full_action_space: bool = False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/starcraft2_environment.html#StarCraft2Environment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.starcraft2_environment.StarCraft2Environment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.starcraft2_environment.</code><code class="sig-name descname">StarCraft2Environment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection</em>, <em class="sig-param">frame_skip: int</em>, <em class="sig-param">visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em class="sig-param">target_success_rate: float = 1.0</em>, <em class="sig-param">seed: Union[None</em>, <em class="sig-param">int] = None</em>, <em class="sig-param">human_control: bool = False</em>, <em class="sig-param">custom_reward_threshold: Union[int</em>, <em class="sig-param">float] = None</em>, <em class="sig-param">screen_size: int = 84</em>, <em class="sig-param">minimap_size: int = 64</em>, <em class="sig-param">feature_minimap_maps_to_use: List = range(0</em>, <em class="sig-param">7)</em>, <em class="sig-param">feature_screen_maps_to_use: List = range(0</em>, <em class="sig-param">17)</em>, <em class="sig-param">observation_type: rl_coach.environments.starcraft2_environment.StarcraftObservationType = <StarcraftObservationType.Features: 0></em>, <em class="sig-param">disable_fog: bool = False</em>, <em class="sig-param">auto_select_all_army: bool = True</em>, <em class="sig-param">use_full_action_space: bool = False</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/starcraft2_environment.html#StarCraft2Environment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.starcraft2_environment.StarCraft2Environment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
@@ -448,7 +448,7 @@ Allows defining a custom reward that will be used to decide when the agent succe
|
||||
<p>Website: <a class="reference external" href="http://vizdoom.cs.put.edu.pl/">ViZDoom</a></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.doom_environment.DoomEnvironment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.doom_environment.</code><code class="descname">DoomEnvironment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, cameras: List[rl_coach.environments.doom_environment.DoomEnvironment.CameraTypes], target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/doom_environment.html#DoomEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.doom_environment.DoomEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.doom_environment.</code><code class="sig-name descname">DoomEnvironment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, cameras: List[rl_coach.environments.doom_environment.DoomEnvironment.CameraTypes], target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/doom_environment.html#DoomEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.doom_environment.DoomEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -491,7 +491,7 @@ Stop experiment if given target success rate was achieved.</p>
|
||||
<p>Website: <a class="reference external" href="https://github.com/carla-simulator/carla">CARLA</a></p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.carla_environment.CarlaEnvironment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.carla_environment.</code><code class="descname">CarlaEnvironment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, verbose: bool, experiment_suite: carla.driving_benchmark.experiment_suites.experiment_suite.ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: rl_coach.environments.carla_environment.CarlaEnvironmentParameters.Quality, cameras: List[rl_coach.environments.carla_environment.CameraTypes], weather_id: List[int], experiment_path: str, separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/carla_environment.html#CarlaEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.carla_environment.CarlaEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.carla_environment.</code><code class="sig-name descname">CarlaEnvironment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, verbose: bool, experiment_suite: carla.driving_benchmark.experiment_suites.experiment_suite.ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: rl_coach.environments.carla_environment.CarlaEnvironmentParameters.Quality, cameras: List[rl_coach.environments.carla_environment.CameraTypes], weather_id: List[int], experiment_path: str, separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, target_success_rate: float = 1.0, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/carla_environment.html#CarlaEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.carla_environment.CarlaEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
@@ -511,7 +511,7 @@ includes a set of robotics environments.</p></li>
|
||||
</ul>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.environments.gym_environment.GymEnvironment">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.environments.gym_environment.</code><code class="descname">GymEnvironment</code><span class="sig-paren">(</span><em>level: rl_coach.environments.environment.LevelSelection</em>, <em>frame_skip: int</em>, <em>visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em>target_success_rate: float = 1.0</em>, <em>additional_simulator_parameters: Dict[str</em>, <em>Any] = {}</em>, <em>seed: Union[None</em>, <em>int] = None</em>, <em>human_control: bool = False</em>, <em>custom_reward_threshold: Union[int</em>, <em>float] = None</em>, <em>random_initialization_steps: int = 1</em>, <em>max_over_num_frames: int = 1</em>, <em>observation_space_type: rl_coach.environments.gym_environment.ObservationSpaceType = None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/gym_environment.html#GymEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.gym_environment.GymEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.environments.gym_environment.</code><code class="sig-name descname">GymEnvironment</code><span class="sig-paren">(</span><em class="sig-param">level: rl_coach.environments.environment.LevelSelection</em>, <em class="sig-param">frame_skip: int</em>, <em class="sig-param">visualization_parameters: rl_coach.base_parameters.VisualizationParameters</em>, <em class="sig-param">target_success_rate: float = 1.0</em>, <em class="sig-param">additional_simulator_parameters: Dict[str</em>, <em class="sig-param">Any] = {}</em>, <em class="sig-param">seed: Union[None</em>, <em class="sig-param">int] = None</em>, <em class="sig-param">human_control: bool = False</em>, <em class="sig-param">custom_reward_threshold: Union[int</em>, <em class="sig-param">float] = None</em>, <em class="sig-param">random_initialization_steps: int = 1</em>, <em class="sig-param">max_over_num_frames: int = 1</em>, <em class="sig-param">observation_space_type: rl_coach.environments.gym_environment.ObservationSpaceType = None</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/environments/gym_environment.html#GymEnvironment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.environments.gym_environment.GymEnvironment" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
||||
@@ -205,7 +205,7 @@ predefined policy. This is one of the most important aspects of reinforcement le
|
||||
tuning to get it right. Coach supports several pre-defined exploration policies, and it can be easily extended with
|
||||
custom policies. Note that not all exploration policies are expected to work for both discrete and continuous action
|
||||
spaces.</p>
|
||||
<table class="docutils align-center">
|
||||
<table class="docutils align-default">
|
||||
<colgroup>
|
||||
<col style="width: 35%" />
|
||||
<col style="width: 37%" />
|
||||
@@ -268,7 +268,7 @@ spaces.</p>
|
||||
<h2>ExplorationPolicy<a class="headerlink" href="#explorationpolicy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.exploration_policy.</code><code class="descname">ExplorationPolicy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.exploration_policy.</code><code class="sig-name descname">ExplorationPolicy</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An exploration policy takes the predicted actions or action values from the agent, and selects the action to
|
||||
actually apply to the environment using some predefined algorithm.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -278,7 +278,7 @@ actually apply to the environment using some predefined algorithm.</p>
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">
|
||||
<code class="descname">change_phase</code><span class="sig-paren">(</span><em>phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">change_phase</code><span class="sig-paren">(</span><em class="sig-param">phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Change between running phases of the algorithm
|
||||
:param phase: Either Heatup or Train
|
||||
:return: none</p>
|
||||
@@ -286,16 +286,19 @@ actually apply to the environment using some predefined algorithm.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">
|
||||
<code class="descname">get_action</code><span class="sig-paren">(</span><em>action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_action</code><span class="sig-paren">(</span><em class="sig-param">action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a list of values corresponding to each action,
|
||||
choose one actions according to the exploration policy
|
||||
:param action_values: A list of action values
|
||||
:return: The chosen action</p>
|
||||
:return: The chosen action,</p>
|
||||
<blockquote>
|
||||
<div><p>The probability of the action (if available, otherwise 1 for absolute certainty in the action)</p>
|
||||
</div></blockquote>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">
|
||||
<code class="descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows exploration policies to define if they require the action values for the current step.
|
||||
This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller
|
||||
than epsilon, the action is completely random, and the action values don’t need to be calculated
|
||||
@@ -304,7 +307,7 @@ than epsilon, the action is completely random, and the action values don’t nee
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">
|
||||
<code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Used for resetting the exploration policy parameters when needed
|
||||
:return: None</p>
|
||||
</dd></dl>
|
||||
@@ -316,7 +319,7 @@ than epsilon, the action is completely random, and the action values don’t nee
|
||||
<h2>AdditiveNoise<a class="headerlink" href="#additivenoise" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.additive_noise.AdditiveNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.additive_noise.</code><code class="descname">AdditiveNoise</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.additive_noise.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.additive_noise.</code><code class="sig-name descname">AdditiveNoise</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">noise_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_noise: float</em>, <em class="sig-param">noise_as_percentage_from_action_space: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.additive_noise.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent
|
||||
and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that
|
||||
can be given in two different ways:
|
||||
@@ -327,9 +330,10 @@ be the mean of the action, and 2nd is assumed to be its standard deviation.</p>
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>action_space</strong> – the action space used by the environment</p></li>
|
||||
<li><p><strong>noise_percentage_schedule</strong> – the schedule for the noise variance percentage relative to the absolute range
|
||||
of the action space</p></li>
|
||||
<li><p><strong>evaluation_noise_percentage</strong> – the noise variance percentage that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_schedule</strong> – the schedule for the noise</p></li>
|
||||
<li><p><strong>evaluation_noise</strong> – the noise variance that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_as_percentage_from_action_space</strong> – a bool deciding whether the noise is absolute or as a percentage
|
||||
from the action space</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
@@ -340,7 +344,7 @@ of the action space</p></li>
|
||||
<h2>Boltzmann<a class="headerlink" href="#boltzmann" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.boltzmann.Boltzmann">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.boltzmann.</code><code class="descname">Boltzmann</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.boltzmann.Boltzmann" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.boltzmann.</code><code class="sig-name descname">Boltzmann</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.boltzmann.Boltzmann" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible
|
||||
actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values
|
||||
into a distribution over the actions. It then samples the action for playing out of the calculated distribution.
|
||||
@@ -360,7 +364,7 @@ An additional temperature schedule can be given by the user, and will control th
|
||||
<h2>Bootstrapped<a class="headerlink" href="#bootstrapped" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.bootstrapped.Bootstrapped">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.bootstrapped.</code><code class="descname">Bootstrapped</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.bootstrapped.Bootstrapped" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.bootstrapped.</code><code class="sig-name descname">Bootstrapped</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">epsilon_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_epsilon: float</em>, <em class="sig-param">architecture_num_q_heads: int</em>, <em class="sig-param">continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.bootstrapped.Bootstrapped" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Bootstrapped exploration policy is currently only used for discrete action spaces along with the
|
||||
Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the
|
||||
values for all the possible actions. For each episode, a single head is selected to lead the agent, according
|
||||
@@ -390,7 +394,7 @@ if the e-greedy is used for a continuous policy</p></li>
|
||||
<h2>Categorical<a class="headerlink" href="#categorical" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.categorical.Categorical">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.categorical.</code><code class="descname">Categorical</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.categorical.Categorical" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.categorical.</code><code class="sig-name descname">Categorical</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.categorical.Categorical" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Categorical exploration policy is intended for discrete action spaces. It expects the action values to
|
||||
represent a probability distribution over the action, from which a single action will be sampled.
|
||||
In evaluation, the action that has the highest probability will be selected. This is particularly useful for
|
||||
@@ -407,7 +411,7 @@ actor-critic schemes, where the actors output is a probability distribution over
|
||||
<h2>ContinuousEntropy<a class="headerlink" href="#continuousentropy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.continuous_entropy.</code><code class="descname">ContinuousEntropy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.continuous_entropy.</code><code class="sig-name descname">ContinuousEntropy</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">noise_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_noise: float</em>, <em class="sig-param">noise_as_percentage_from_action_space: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Continuous entropy is an exploration policy that is actually implemented as part of the network.
|
||||
The exploration policy class is only a placeholder for choosing this policy. The exploration policy is
|
||||
implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action.
|
||||
@@ -422,9 +426,10 @@ There are only a few heads that actually are relevant and implement the entropy
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>action_space</strong> – the action space used by the environment</p></li>
|
||||
<li><p><strong>noise_percentage_schedule</strong> – the schedule for the noise variance percentage relative to the absolute range
|
||||
of the action space</p></li>
|
||||
<li><p><strong>evaluation_noise_percentage</strong> – the noise variance percentage that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_schedule</strong> – the schedule for the noise</p></li>
|
||||
<li><p><strong>evaluation_noise</strong> – the noise variance that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_as_percentage_from_action_space</strong> – a bool deciding whether the noise is absolute or as a percentage
|
||||
from the action space</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
@@ -435,7 +440,7 @@ of the action space</p></li>
|
||||
<h2>EGreedy<a class="headerlink" href="#egreedy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.e_greedy.EGreedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.e_greedy.</code><code class="descname">EGreedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.e_greedy.EGreedy" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.e_greedy.</code><code class="sig-name descname">EGreedy</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">epsilon_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_epsilon: float</em>, <em class="sig-param">continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.e_greedy.EGreedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</p>
|
||||
<p>For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the
|
||||
highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the
|
||||
@@ -463,7 +468,7 @@ if the e-greedy is used for a continuous policy</p></li>
|
||||
<h2>Greedy<a class="headerlink" href="#greedy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.greedy.Greedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.greedy.</code><code class="descname">Greedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.greedy.Greedy" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.greedy.</code><code class="sig-name descname">Greedy</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.greedy.Greedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The Greedy exploration policy is intended for both discrete and continuous action spaces.
|
||||
For discrete action spaces, it always selects the action with the maximum value, as given by the agent.
|
||||
For continuous action spaces, it always return the exact action, as it was given by the agent.</p>
|
||||
@@ -479,7 +484,7 @@ For continuous action spaces, it always return the exact action, as it was given
|
||||
<h2>OUProcess<a class="headerlink" href="#ouprocess" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.ou_process.OUProcess">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ou_process.</code><code class="descname">OUProcess</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>mu: float = 0</em>, <em>theta: float = 0.15</em>, <em>sigma: float = 0.2</em>, <em>dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ou_process.OUProcess" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.ou_process.</code><code class="sig-name descname">OUProcess</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">mu: float = 0</em>, <em class="sig-param">theta: float = 0.15</em>, <em class="sig-param">sigma: float = 0.2</em>, <em class="sig-param">dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ou_process.OUProcess" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>OUProcess exploration policy is intended for continuous action spaces, and selects the action according to
|
||||
an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where
|
||||
the samples are correlated between consequent time steps.</p>
|
||||
@@ -495,7 +500,7 @@ the samples are correlated between consequent time steps.</p>
|
||||
<h2>ParameterNoise<a class="headerlink" href="#parameternoise" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.parameter_noise.ParameterNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.parameter_noise.</code><code class="descname">ParameterNoise</code><span class="sig-paren">(</span><em>network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.parameter_noise.ParameterNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.parameter_noise.</code><code class="sig-name descname">ParameterNoise</code><span class="sig-paren">(</span><em class="sig-param">network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.parameter_noise.ParameterNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.
|
||||
It applies the exploration policy by replacing all the dense network layers with noisy layers.
|
||||
The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network
|
||||
@@ -514,7 +519,7 @@ values.</p>
|
||||
<h2>TruncatedNormal<a class="headerlink" href="#truncatednormal" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.truncated_normal.TruncatedNormal">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.truncated_normal.</code><code class="descname">TruncatedNormal</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em>, <em>clip_low: float</em>, <em>clip_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.truncated_normal.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.truncated_normal.</code><code class="sig-name descname">TruncatedNormal</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">noise_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_noise: float</em>, <em class="sig-param">clip_low: float</em>, <em class="sig-param">clip_high: float</em>, <em class="sig-param">noise_as_percentage_from_action_space: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.truncated_normal.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a
|
||||
normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t
|
||||
wo different ways:
|
||||
@@ -527,9 +532,10 @@ is within the bounds.</p>
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>action_space</strong> – the action space used by the environment</p></li>
|
||||
<li><p><strong>noise_percentage_schedule</strong> – the schedule for the noise variance percentage relative to the absolute range
|
||||
of the action space</p></li>
|
||||
<li><p><strong>evaluation_noise_percentage</strong> – the noise variance percentage that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_schedule</strong> – the schedule for the noise variance</p></li>
|
||||
<li><p><strong>evaluation_noise</strong> – the noise variance that will be used during evaluation phases</p></li>
|
||||
<li><p><strong>noise_as_percentage_from_action_space</strong> – whether to consider the noise as a percentage of the action space
|
||||
or absolute value</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
</dl>
|
||||
@@ -540,7 +546,7 @@ of the action space</p></li>
|
||||
<h2>UCB<a class="headerlink" href="#ucb" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.ucb.UCB">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ucb.</code><code class="descname">UCB</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>lamb: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ucb.UCB" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.exploration_policies.ucb.</code><code class="sig-name descname">UCB</code><span class="sig-paren">(</span><em class="sig-param">action_space: rl_coach.spaces.ActionSpace</em>, <em class="sig-param">epsilon_schedule: rl_coach.schedules.Schedule</em>, <em class="sig-param">evaluation_epsilon: float</em>, <em class="sig-param">architecture_num_q_heads: int</em>, <em class="sig-param">lamb: int</em>, <em class="sig-param">continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ucb.UCB" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
|
||||
It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
|
||||
between the heads predictions represents the uncertainty of the agent in each of the actions.
|
||||
|
||||
@@ -221,7 +221,7 @@
|
||||
<h3>ObservationClippingFilter<a class="headerlink" href="#observationclippingfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationClippingFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationClippingFilter</code><span class="sig-paren">(</span><em>clipping_low: float = -inf</em>, <em>clipping_high: float = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_clipping_filter.html#ObservationClippingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationClippingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationClippingFilter</code><span class="sig-paren">(</span><em class="sig-param">clipping_low: float = -inf</em>, <em class="sig-param">clipping_high: float = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_clipping_filter.html#ObservationClippingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationClippingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Clips the observation values to a given range of values.
|
||||
For example, if the observation consists of measurements in an arbitrary range,
|
||||
and we want to control the minimum and maximum values of these observations,
|
||||
@@ -241,7 +241,7 @@ we can define a range and clip the values of the measurements.</p>
|
||||
<h3>ObservationCropFilter<a class="headerlink" href="#observationcropfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationCropFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationCropFilter</code><span class="sig-paren">(</span><em>crop_low: numpy.ndarray = None</em>, <em>crop_high: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_crop_filter.html#ObservationCropFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationCropFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationCropFilter</code><span class="sig-paren">(</span><em class="sig-param">crop_low: numpy.ndarray = None</em>, <em class="sig-param">crop_high: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_crop_filter.html#ObservationCropFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationCropFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Crops the size of the observation to a given crop window. For example, in Atari, the
|
||||
observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a
|
||||
square of 160x160 before rescaling them.</p>
|
||||
@@ -262,7 +262,7 @@ corresponding dimension. a negative value of -1 will be mapped to the max size</
|
||||
<h3>ObservationMoveAxisFilter<a class="headerlink" href="#observationmoveaxisfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationMoveAxisFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationMoveAxisFilter</code><span class="sig-paren">(</span><em>axis_origin: int = None</em>, <em>axis_target: int = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_move_axis_filter.html#ObservationMoveAxisFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationMoveAxisFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationMoveAxisFilter</code><span class="sig-paren">(</span><em class="sig-param">axis_origin: int = None</em>, <em class="sig-param">axis_target: int = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_move_axis_filter.html#ObservationMoveAxisFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationMoveAxisFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Reorders the axes of the observation. This can be useful when the observation is an
|
||||
image, and we want to move the channel axis to be the last axis instead of the first axis.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -280,7 +280,7 @@ image, and we want to move the channel axis to be the last axis instead of the f
|
||||
<h3>ObservationNormalizationFilter<a class="headerlink" href="#observationnormalizationfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationNormalizationFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationNormalizationFilter</code><span class="sig-paren">(</span><em>clip_min: float = -5.0</em>, <em>clip_max: float = 5.0</em>, <em>name='observation_stats'</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_normalization_filter.html#ObservationNormalizationFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationNormalizationFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationNormalizationFilter</code><span class="sig-paren">(</span><em class="sig-param">clip_min: float = -5.0</em>, <em class="sig-param">clip_max: float = 5.0</em>, <em class="sig-param">name='observation_stats'</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_normalization_filter.html#ObservationNormalizationFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationNormalizationFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Normalizes the observation values with a running mean and standard deviation of
|
||||
all the observations seen so far. The normalization is performed element-wise. Additionally, when working with
|
||||
multiple workers, the statistics used for the normalization operation are accumulated over all the workers.</p>
|
||||
@@ -299,7 +299,7 @@ multiple workers, the statistics used for the normalization operation are accumu
|
||||
<h3>ObservationReductionBySubPartsNameFilter<a class="headerlink" href="#observationreductionbysubpartsnamefilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationReductionBySubPartsNameFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationReductionBySubPartsNameFilter</code><span class="sig-paren">(</span><em>part_names: List[str], reduction_method: rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter.ObservationReductionBySubPartsNameFilter.ReductionMethod</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html#ObservationReductionBySubPartsNameFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationReductionBySubPartsNameFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationReductionBySubPartsNameFilter</code><span class="sig-paren">(</span><em class="sig-param">part_names: List[str], reduction_method: rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter.ObservationReductionBySubPartsNameFilter.ReductionMethod</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html#ObservationReductionBySubPartsNameFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationReductionBySubPartsNameFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows keeping only parts of the observation, by specifying their
|
||||
name. This is useful when the environment has a measurements vector as observation which includes several different
|
||||
measurements, but you want the agent to only see some of the measurements and not all.
|
||||
@@ -321,7 +321,7 @@ This will currently work only for VectorObservationSpace observations</p>
|
||||
<h3>ObservationRescaleSizeByFactorFilter<a class="headerlink" href="#observationrescalesizebyfactorfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationRescaleSizeByFactorFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationRescaleSizeByFactorFilter</code><span class="sig-paren">(</span><em>rescale_factor: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html#ObservationRescaleSizeByFactorFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRescaleSizeByFactorFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationRescaleSizeByFactorFilter</code><span class="sig-paren">(</span><em class="sig-param">rescale_factor: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html#ObservationRescaleSizeByFactorFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRescaleSizeByFactorFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Rescales an image observation by some factor. For example, the image size
|
||||
can be reduced by a factor of 2.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -336,7 +336,7 @@ can be reduced by a factor of 2.</p>
|
||||
<h3>ObservationRescaleToSizeFilter<a class="headerlink" href="#observationrescaletosizefilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationRescaleToSizeFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationRescaleToSizeFilter</code><span class="sig-paren">(</span><em>output_observation_space: rl_coach.spaces.PlanarMapsObservationSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html#ObservationRescaleToSizeFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRescaleToSizeFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationRescaleToSizeFilter</code><span class="sig-paren">(</span><em class="sig-param">output_observation_space: rl_coach.spaces.PlanarMapsObservationSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html#ObservationRescaleToSizeFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRescaleToSizeFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Rescales an image observation to a given size. The target size does not
|
||||
necessarily keep the aspect ratio of the original observation.
|
||||
Warning: this requires the input observation to be of type uint8 due to scipy requirements!</p>
|
||||
@@ -352,7 +352,7 @@ Warning: this requires the input observation to be of type uint8 due to scipy re
|
||||
<h3>ObservationRGBToYFilter<a class="headerlink" href="#observationrgbtoyfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationRGBToYFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationRGBToYFilter</code><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html#ObservationRGBToYFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRGBToYFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationRGBToYFilter</code><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html#ObservationRGBToYFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationRGBToYFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Converts a color image observation specified using the RGB encoding into a grayscale
|
||||
image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors
|
||||
in the original image are not relevant for solving the task at hand.
|
||||
@@ -364,7 +364,7 @@ The channels axis is assumed to be the last axis</p>
|
||||
<h3>ObservationSqueezeFilter<a class="headerlink" href="#observationsqueezefilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationSqueezeFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationSqueezeFilter</code><span class="sig-paren">(</span><em>axis: int = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_squeeze_filter.html#ObservationSqueezeFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationSqueezeFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationSqueezeFilter</code><span class="sig-paren">(</span><em class="sig-param">axis: int = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_squeeze_filter.html#ObservationSqueezeFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationSqueezeFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Removes redundant axes from the observation, which are axes with a dimension of 1.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -378,7 +378,7 @@ The channels axis is assumed to be the last axis</p>
|
||||
<h3>ObservationStackingFilter<a class="headerlink" href="#observationstackingfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationStackingFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationStackingFilter</code><span class="sig-paren">(</span><em>stack_size: int</em>, <em>stacking_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_stacking_filter.html#ObservationStackingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationStackingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationStackingFilter</code><span class="sig-paren">(</span><em class="sig-param">stack_size: int</em>, <em class="sig-param">stacking_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_stacking_filter.html#ObservationStackingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationStackingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Stacks several observations on top of each other. For image observation this will
|
||||
create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this,
|
||||
a LazyStack object is used in order to wrap the observations in the stack. For this reason, the
|
||||
@@ -403,7 +403,7 @@ and increase the memory footprint.</p>
|
||||
<h3>ObservationToUInt8Filter<a class="headerlink" href="#observationtouint8filter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.observation.ObservationToUInt8Filter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.observation.</code><code class="descname">ObservationToUInt8Filter</code><span class="sig-paren">(</span><em>input_low: float</em>, <em>input_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_to_uint8_filter.html#ObservationToUInt8Filter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationToUInt8Filter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.observation.</code><code class="sig-name descname">ObservationToUInt8Filter</code><span class="sig-paren">(</span><em class="sig-param">input_low: float</em>, <em class="sig-param">input_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/observation/observation_to_uint8_filter.html#ObservationToUInt8Filter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.observation.ObservationToUInt8Filter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Converts a floating point observation into an unsigned int 8 bit observation. This is
|
||||
mostly useful for reducing memory consumption and is usually used for image observations. The filter will first
|
||||
spread the observation values over the range 0-255 and then discretize them into integer values.</p>
|
||||
@@ -425,7 +425,7 @@ spread the observation values over the range 0-255 and then discretize them into
|
||||
<h3>RewardClippingFilter<a class="headerlink" href="#rewardclippingfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.reward.RewardClippingFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.reward.</code><code class="descname">RewardClippingFilter</code><span class="sig-paren">(</span><em>clipping_low: float = -inf</em>, <em>clipping_high: float = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_clipping_filter.html#RewardClippingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardClippingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.reward.</code><code class="sig-name descname">RewardClippingFilter</code><span class="sig-paren">(</span><em class="sig-param">clipping_low: float = -inf</em>, <em class="sig-param">clipping_high: float = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_clipping_filter.html#RewardClippingFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardClippingFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Clips the reward values into a given range. For example, in DQN, the Atari rewards are
|
||||
clipped into the range -1 and 1 in order to control the scale of the returns.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -443,7 +443,7 @@ clipped into the range -1 and 1 in order to control the scale of the returns.</p
|
||||
<h3>RewardNormalizationFilter<a class="headerlink" href="#rewardnormalizationfilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.reward.RewardNormalizationFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.reward.</code><code class="descname">RewardNormalizationFilter</code><span class="sig-paren">(</span><em>clip_min: float = -5.0</em>, <em>clip_max: float = 5.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_normalization_filter.html#RewardNormalizationFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardNormalizationFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.reward.</code><code class="sig-name descname">RewardNormalizationFilter</code><span class="sig-paren">(</span><em class="sig-param">clip_min: float = -5.0</em>, <em class="sig-param">clip_max: float = 5.0</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_normalization_filter.html#RewardNormalizationFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardNormalizationFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Normalizes the reward values with a running mean and standard deviation of
|
||||
all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation
|
||||
are accumulated over all the workers.</p>
|
||||
@@ -462,7 +462,7 @@ are accumulated over all the workers.</p>
|
||||
<h3>RewardRescaleFilter<a class="headerlink" href="#rewardrescalefilter" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.reward.RewardRescaleFilter">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.reward.</code><code class="descname">RewardRescaleFilter</code><span class="sig-paren">(</span><em>rescale_factor: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_rescale_filter.html#RewardRescaleFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardRescaleFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.reward.</code><code class="sig-name descname">RewardRescaleFilter</code><span class="sig-paren">(</span><em class="sig-param">rescale_factor: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/reward/reward_rescale_filter.html#RewardRescaleFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.reward.RewardRescaleFilter" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Rescales the reward by a given factor. Rescaling the rewards of the environment has been
|
||||
observed to have a large effect (negative or positive) on the behavior of the learning process.</p>
|
||||
<dl class="field-list simple">
|
||||
|
||||
@@ -200,7 +200,7 @@
|
||||
<h2>Action Filters<a class="headerlink" href="#action-filters" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.AttentionDiscretization">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">AttentionDiscretization</code><span class="sig-paren">(</span><em>num_bins_per_dimension: Union[int, List[int]], force_int_bins=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/attention_discretization.html#AttentionDiscretization"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.AttentionDiscretization" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">AttentionDiscretization</code><span class="sig-paren">(</span><em class="sig-param">num_bins_per_dimension: Union[int, List[int]], force_int_bins=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/attention_discretization.html#AttentionDiscretization"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.AttentionDiscretization" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Discretizes an <strong>AttentionActionSpace</strong>. The attention action space defines the actions
|
||||
as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing
|
||||
a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop
|
||||
@@ -219,7 +219,7 @@ windows to choose into a finite number of options, and map a discrete action spa
|
||||
<img alt="../../_images/attention_discretization.png" class="align-center" src="../../_images/attention_discretization.png" />
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.BoxDiscretization">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">BoxDiscretization</code><span class="sig-paren">(</span><em>num_bins_per_dimension: Union[int, List[int]], force_int_bins=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/box_discretization.html#BoxDiscretization"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.BoxDiscretization" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">BoxDiscretization</code><span class="sig-paren">(</span><em class="sig-param">num_bins_per_dimension: Union[int, List[int]], force_int_bins=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/box_discretization.html#BoxDiscretization"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.BoxDiscretization" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Discretizes a continuous action space into a discrete action space, allowing the usage of
|
||||
agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the
|
||||
original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete
|
||||
@@ -242,7 +242,7 @@ instead of 0, 2.5, 5, 7.5, 10.</p></li>
|
||||
<img alt="../../_images/box_discretization.png" class="align-center" src="../../_images/box_discretization.png" />
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.BoxMasking">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">BoxMasking</code><span class="sig-paren">(</span><em>masked_target_space_low: Union[None, int, float, numpy.ndarray], masked_target_space_high: Union[None, int, float, numpy.ndarray]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/box_masking.html#BoxMasking"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.BoxMasking" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">BoxMasking</code><span class="sig-paren">(</span><em class="sig-param">masked_target_space_low: Union[None, int, float, numpy.ndarray], masked_target_space_high: Union[None, int, float, numpy.ndarray]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/box_masking.html#BoxMasking"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.BoxMasking" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Masks part of the action space to enforce the agent to work in a defined space. For example,
|
||||
if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions
|
||||
to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.
|
||||
@@ -260,7 +260,7 @@ The resulting action space will be shifted and will always start from 0 and have
|
||||
<img alt="../../_images/box_masking.png" class="align-center" src="../../_images/box_masking.png" />
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.PartialDiscreteActionSpaceMap">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">PartialDiscreteActionSpaceMap</code><span class="sig-paren">(</span><em>target_actions: List[Union[int</em>, <em>float</em>, <em>numpy.ndarray</em>, <em>List]] = None</em>, <em>descriptions: List[str] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/partial_discrete_action_space_map.html#PartialDiscreteActionSpaceMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.PartialDiscreteActionSpaceMap" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">PartialDiscreteActionSpaceMap</code><span class="sig-paren">(</span><em class="sig-param">target_actions: List[Union[int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray</em>, <em class="sig-param">List]] = None</em>, <em class="sig-param">descriptions: List[str] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/partial_discrete_action_space_map.html#PartialDiscreteActionSpaceMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.PartialDiscreteActionSpaceMap" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Partial map of two countable action spaces. For example, consider an environment
|
||||
with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual
|
||||
MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can
|
||||
@@ -279,7 +279,7 @@ use regular discrete actions, and mask 3 of the actions from the agent.</p>
|
||||
<img alt="../../_images/partial_discrete_action_space_map.png" class="align-center" src="../../_images/partial_discrete_action_space_map.png" />
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.FullDiscreteActionSpaceMap">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">FullDiscreteActionSpaceMap</code><a class="reference internal" href="../../_modules/rl_coach/filters/action/full_discrete_action_space_map.html#FullDiscreteActionSpaceMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.FullDiscreteActionSpaceMap" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">FullDiscreteActionSpaceMap</code><a class="reference internal" href="../../_modules/rl_coach/filters/action/full_discrete_action_space_map.html#FullDiscreteActionSpaceMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.FullDiscreteActionSpaceMap" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Full map of two countable action spaces. This works in a similar way to the
|
||||
PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without
|
||||
masking any actions.
|
||||
@@ -290,7 +290,7 @@ multiselect actions.</p>
|
||||
<img alt="../../_images/full_discrete_action_space_map.png" class="align-center" src="../../_images/full_discrete_action_space_map.png" />
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.filters.action.LinearBoxToBoxMap">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.filters.action.</code><code class="descname">LinearBoxToBoxMap</code><span class="sig-paren">(</span><em>input_space_low: Union[None, int, float, numpy.ndarray], input_space_high: Union[None, int, float, numpy.ndarray]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/linear_box_to_box_map.html#LinearBoxToBoxMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.LinearBoxToBoxMap" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.filters.action.</code><code class="sig-name descname">LinearBoxToBoxMap</code><span class="sig-paren">(</span><em class="sig-param">input_space_low: Union[None, int, float, numpy.ndarray], input_space_high: Union[None, int, float, numpy.ndarray]</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/filters/action/linear_box_to_box_map.html#LinearBoxToBoxMap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.filters.action.LinearBoxToBoxMap" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A linear mapping of two box action spaces. For example, if the action space of the
|
||||
environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1,
|
||||
the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
<h3>EpisodicExperienceReplay<a class="headerlink" href="#episodicexperiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.episodic.EpisodicExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.episodic.</code><code class="descname">EpisodicExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity</em>, <em>int] = (<MemoryGranularity.Transitions: 0></em>, <em>1000000)</em>, <em>n_step=-1</em>, <em>train_to_eval_ratio: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_experience_replay.html#EpisodicExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.episodic.</code><code class="sig-name descname">EpisodicExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity</em>, <em class="sig-param">int] = (<MemoryGranularity.Transitions: 0></em>, <em class="sig-param">1000000)</em>, <em class="sig-param">n_step=-1</em>, <em class="sig-param">train_to_eval_ratio: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_experience_replay.html#EpisodicExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A replay buffer that stores episodes of transitions. The additional structure allows performing various
|
||||
calculations of total return and other values that depend on the sequential behavior of the transitions
|
||||
in the episode.</p>
|
||||
@@ -225,7 +225,7 @@ in the episode.</p>
|
||||
<h3>EpisodicHindsightExperienceReplay<a class="headerlink" href="#episodichindsightexperiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.episodic.EpisodicHindsightExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.episodic.</code><code class="descname">EpisodicHindsightExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html#EpisodicHindsightExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicHindsightExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.episodic.</code><code class="sig-name descname">EpisodicHindsightExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html#EpisodicHindsightExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicHindsightExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Implements Hindsight Experience Replay as described in the following paper: <a class="reference external" href="https://arxiv.org/pdf/1707.01495.pdf">https://arxiv.org/pdf/1707.01495.pdf</a></p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -246,7 +246,7 @@ hindsight transitions. Should be one of HindsightGoalSelectionMethod</p></li>
|
||||
<h3>EpisodicHRLHindsightExperienceReplay<a class="headerlink" href="#episodichrlhindsightexperiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.episodic.</code><code class="descname">EpisodicHRLHindsightExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html#EpisodicHRLHindsightExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.episodic.</code><code class="sig-name descname">EpisodicHRLHindsightExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html#EpisodicHRLHindsightExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Implements HRL Hindsight Experience Replay as described in the following paper: <a class="reference external" href="https://arxiv.org/abs/1805.08180">https://arxiv.org/abs/1805.08180</a></p>
|
||||
<p>This is the memory you should use if you want a shared hindsight experience replay buffer between multiple workers</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -269,7 +269,7 @@ hindsight transitions. Should be one of HindsightGoalSelectionMethod</p></li>
|
||||
<h3>SingleEpisodeBuffer<a class="headerlink" href="#singleepisodebuffer" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.episodic.SingleEpisodeBuffer">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.episodic.</code><code class="descname">SingleEpisodeBuffer</code><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/single_episode_buffer.html#SingleEpisodeBuffer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.SingleEpisodeBuffer" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.episodic.</code><code class="sig-name descname">SingleEpisodeBuffer</code><a class="reference internal" href="../../_modules/rl_coach/memories/episodic/single_episode_buffer.html#SingleEpisodeBuffer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.episodic.SingleEpisodeBuffer" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
@@ -280,7 +280,7 @@ hindsight transitions. Should be one of HindsightGoalSelectionMethod</p></li>
|
||||
<h3>BalancedExperienceReplay<a class="headerlink" href="#balancedexperiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.non_episodic.BalancedExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.non_episodic.</code><code class="descname">BalancedExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True, num_classes: int = 0, state_key_with_the_class_index: Any = 'class'</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html#BalancedExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.BalancedExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.non_episodic.</code><code class="sig-name descname">BalancedExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True, num_classes: int = 0, state_key_with_the_class_index: Any = 'class'</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html#BalancedExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.BalancedExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -299,7 +299,7 @@ this parameter determines the key to retrieve the class index value</p></li>
|
||||
<h3>QDND<a class="headerlink" href="#qdnd" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.non_episodic.QDND">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.non_episodic.</code><code class="descname">QDND</code><span class="sig-paren">(</span><em>dict_size</em>, <em>key_width</em>, <em>num_actions</em>, <em>new_value_shift_coefficient=0.1</em>, <em>key_error_threshold=0.01</em>, <em>learning_rate=0.01</em>, <em>num_neighbors=50</em>, <em>return_additional_data=False</em>, <em>override_existing_keys=False</em>, <em>rebuild_on_every_update=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html#QDND"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.QDND" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.non_episodic.</code><code class="sig-name descname">QDND</code><span class="sig-paren">(</span><em class="sig-param">dict_size</em>, <em class="sig-param">key_width</em>, <em class="sig-param">num_actions</em>, <em class="sig-param">new_value_shift_coefficient=0.1</em>, <em class="sig-param">key_error_threshold=0.01</em>, <em class="sig-param">learning_rate=0.01</em>, <em class="sig-param">num_neighbors=50</em>, <em class="sig-param">return_additional_data=False</em>, <em class="sig-param">override_existing_keys=False</em>, <em class="sig-param">rebuild_on_every_update=False</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html#QDND"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.QDND" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
</div>
|
||||
@@ -307,7 +307,7 @@ this parameter determines the key to retrieve the class index value</p></li>
|
||||
<h3>ExperienceReplay<a class="headerlink" href="#experiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.non_episodic.ExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.non_episodic.</code><code class="descname">ExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/experience_replay.html#ExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.ExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.non_episodic.</code><code class="sig-name descname">ExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/experience_replay.html#ExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.ExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A regular replay buffer which stores transition without any additional structure</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -324,7 +324,7 @@ this parameter determines the key to retrieve the class index value</p></li>
|
||||
<h3>PrioritizedExperienceReplay<a class="headerlink" href="#prioritizedexperiencereplay" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.non_episodic.PrioritizedExperienceReplay">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.non_episodic.</code><code class="descname">PrioritizedExperienceReplay</code><span class="sig-paren">(</span><em>max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], alpha: float = 0.6, beta: rl_coach.schedules.Schedule = <rl_coach.schedules.ConstantSchedule object>, epsilon: float = 1e-06, allow_duplicates_in_batch_sampling: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html#PrioritizedExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.PrioritizedExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.non_episodic.</code><code class="sig-name descname">PrioritizedExperienceReplay</code><span class="sig-paren">(</span><em class="sig-param">max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], alpha: float = 0.6, beta: rl_coach.schedules.Schedule = <rl_coach.schedules.ConstantSchedule object>, epsilon: float = 1e-06, allow_duplicates_in_batch_sampling: bool = True</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html#PrioritizedExperienceReplay"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.PrioritizedExperienceReplay" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This is the proportional sampling variant of the prioritized experience replay as described
|
||||
in <a class="reference external" href="https://arxiv.org/pdf/1511.05952.pdf">https://arxiv.org/pdf/1511.05952.pdf</a>.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -345,7 +345,7 @@ in <a class="reference external" href="https://arxiv.org/pdf/1511.05952.pdf">htt
|
||||
<h3>TransitionCollection<a class="headerlink" href="#transitioncollection" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.non_episodic.TransitionCollection">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.non_episodic.</code><code class="descname">TransitionCollection</code><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/transition_collection.html#TransitionCollection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.TransitionCollection" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.non_episodic.</code><code class="sig-name descname">TransitionCollection</code><a class="reference internal" href="../../_modules/rl_coach/memories/non_episodic/transition_collection.html#TransitionCollection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.non_episodic.TransitionCollection" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Simple python implementation of transitions collection non-episodic memories
|
||||
are constructed on top of.</p>
|
||||
</dd></dl>
|
||||
|
||||
@@ -193,7 +193,7 @@
|
||||
<h2>RedisPubSubBackend<a class="headerlink" href="#redispubsubbackend" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.memories.backend.redis.RedisPubSubBackend">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.memories.backend.redis.</code><code class="descname">RedisPubSubBackend</code><span class="sig-paren">(</span><em>params: rl_coach.memories.backend.redis.RedisPubSubMemoryBackendParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/backend/redis.html#RedisPubSubBackend"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.backend.redis.RedisPubSubBackend" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.memories.backend.redis.</code><code class="sig-name descname">RedisPubSubBackend</code><span class="sig-paren">(</span><em class="sig-param">params: rl_coach.memories.backend.redis.RedisPubSubMemoryBackendParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/memories/backend/redis.html#RedisPubSubBackend"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.memories.backend.redis.RedisPubSubBackend" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A memory backend which transfers the experiences from the rollout to the training worker using Redis Pub/Sub in
|
||||
Coach when distributed mode is used.</p>
|
||||
<dl class="field-list simple">
|
||||
|
||||
@@ -193,7 +193,7 @@
|
||||
<h2>Kubernetes<a class="headerlink" href="#kubernetes" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.orchestrators.kubernetes_orchestrator.</code><code class="descname">Kubernetes</code><span class="sig-paren">(</span><em>params: rl_coach.orchestrators.kubernetes_orchestrator.KubernetesParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/orchestrators/kubernetes_orchestrator.html#Kubernetes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.orchestrators.kubernetes_orchestrator.</code><code class="sig-name descname">Kubernetes</code><span class="sig-paren">(</span><em class="sig-param">params: rl_coach.orchestrators.kubernetes_orchestrator.KubernetesParameters</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/orchestrators/kubernetes_orchestrator.html#Kubernetes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An orchestrator implmentation which uses Kubernetes to deploy the components such as training and rollout workers
|
||||
and Redis Pub/Sub in Coach when used in the distributed mode.</p>
|
||||
<dl class="field-list simple">
|
||||
|
||||
@@ -208,7 +208,7 @@
|
||||
<h2>Space<a class="headerlink" href="#space" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.Space">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">Space</code><span class="sig-paren">(</span><em>shape: Union[int, tuple, list, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#Space"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">Space</code><span class="sig-paren">(</span><em class="sig-param">shape: Union[int, tuple, list, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#Space"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A space defines a set of valid values</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -223,7 +223,7 @@ or a single value defining the general highest values</p></li>
|
||||
</dl>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.Space.contains">
|
||||
<code class="descname">contains</code><span class="sig-paren">(</span><em>val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.contains"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">contains</code><span class="sig-paren">(</span><em class="sig-param">val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.contains"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if value is contained by this space. The shape must match and
|
||||
all of the values must be within the low and high bounds.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -238,7 +238,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.Space.is_valid_index">
|
||||
<code class="descname">is_valid_index</code><span class="sig-paren">(</span><em>index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.is_valid_index"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">is_valid_index</code><span class="sig-paren">(</span><em class="sig-param">index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.is_valid_index"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if a given multidimensional index is within the bounds of the shape of the space</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -252,7 +252,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.Space.sample">
|
||||
<code class="descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.sample"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="reference internal" href="../_modules/rl_coach/spaces.html#Space.sample"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.Space.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no
|
||||
bounds are defined</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -269,10 +269,10 @@ bounds are defined</p>
|
||||
<h2>Observation Spaces<a class="headerlink" href="#observation-spaces" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.ObservationSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">ObservationSpace</code><span class="sig-paren">(</span><em>shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">ObservationSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="method">
|
||||
<dt id="rl_coach.spaces.ObservationSpace.contains">
|
||||
<code class="descname">contains</code><span class="sig-paren">(</span><em>val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">contains</code><span class="sig-paren">(</span><em class="sig-param">val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if value is contained by this space. The shape must match and
|
||||
all of the values must be within the low and high bounds.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -287,7 +287,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ObservationSpace.is_valid_index">
|
||||
<code class="descname">is_valid_index</code><span class="sig-paren">(</span><em>index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">is_valid_index</code><span class="sig-paren">(</span><em class="sig-param">index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if a given multidimensional index is within the bounds of the shape of the space</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -301,7 +301,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ObservationSpace.sample">
|
||||
<code class="descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.ObservationSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no
|
||||
bounds are defined</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -317,7 +317,7 @@ bounds are defined</p>
|
||||
<h3>VectorObservationSpace<a class="headerlink" href="#vectorobservationspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.VectorObservationSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">VectorObservationSpace</code><span class="sig-paren">(</span><em>shape: int</em>, <em>low: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray] = -inf</em>, <em>high: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray] = inf</em>, <em>measurements_names: List[str] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#VectorObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.VectorObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">VectorObservationSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: int</em>, <em class="sig-param">low: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = -inf</em>, <em class="sig-param">high: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = inf</em>, <em class="sig-param">measurements_names: List[str] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#VectorObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.VectorObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An observation space which is defined as a vector of elements. This can be particularly useful for environments
|
||||
which return measurements, such as in robotic environments.</p>
|
||||
</dd></dl>
|
||||
@@ -327,7 +327,7 @@ which return measurements, such as in robotic environments.</p>
|
||||
<h3>PlanarMapsObservationSpace<a class="headerlink" href="#planarmapsobservationspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.PlanarMapsObservationSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">PlanarMapsObservationSpace</code><span class="sig-paren">(</span><em>shape: numpy.ndarray</em>, <em>low: int</em>, <em>high: int</em>, <em>channels_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#PlanarMapsObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.PlanarMapsObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">PlanarMapsObservationSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: numpy.ndarray</em>, <em class="sig-param">low: int</em>, <em class="sig-param">high: int</em>, <em class="sig-param">channels_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#PlanarMapsObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.PlanarMapsObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An observation space which defines a stack of 2D observations. For example, an environment which returns
|
||||
a stack of segmentation maps like in Starcraft.</p>
|
||||
</dd></dl>
|
||||
@@ -337,7 +337,7 @@ a stack of segmentation maps like in Starcraft.</p>
|
||||
<h3>ImageObservationSpace<a class="headerlink" href="#imageobservationspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.ImageObservationSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">ImageObservationSpace</code><span class="sig-paren">(</span><em>shape: numpy.ndarray</em>, <em>high: int</em>, <em>channels_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ImageObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ImageObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">ImageObservationSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: numpy.ndarray</em>, <em class="sig-param">high: int</em>, <em class="sig-param">channels_axis: int = -1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ImageObservationSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ImageObservationSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An observation space which is a private case of the PlanarMapsObservationSpace, where the stack of 2D observations
|
||||
represent a RGB image, or a grayscale image.</p>
|
||||
</dd></dl>
|
||||
@@ -348,10 +348,10 @@ represent a RGB image, or a grayscale image.</p>
|
||||
<h2>Action Spaces<a class="headerlink" href="#action-spaces" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.ActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">ActionSpace</code><span class="sig-paren">(</span><em>shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: Union[int, float, numpy.ndarray, List] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">ActionSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: Union[int, float, numpy.ndarray, List] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="method">
|
||||
<dt id="rl_coach.spaces.ActionSpace.clip_action_to_space">
|
||||
<code class="descname">clip_action_to_space</code><span class="sig-paren">(</span><em>action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace.clip_action_to_space"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace.clip_action_to_space" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">clip_action_to_space</code><span class="sig-paren">(</span><em class="sig-param">action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace.clip_action_to_space"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace.clip_action_to_space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given an action, clip its values to fit to the action space ranges</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -365,7 +365,7 @@ represent a RGB image, or a grayscale image.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ActionSpace.contains">
|
||||
<code class="descname">contains</code><span class="sig-paren">(</span><em>val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ActionSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">contains</code><span class="sig-paren">(</span><em class="sig-param">val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ActionSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if value is contained by this space. The shape must match and
|
||||
all of the values must be within the low and high bounds.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -380,7 +380,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ActionSpace.is_valid_index">
|
||||
<code class="descname">is_valid_index</code><span class="sig-paren">(</span><em>index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ActionSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">is_valid_index</code><span class="sig-paren">(</span><em class="sig-param">index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.ActionSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if a given multidimensional index is within the bounds of the shape of the space</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -394,7 +394,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ActionSpace.sample">
|
||||
<code class="descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.ActionSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.ActionSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no
|
||||
bounds are defined</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -406,7 +406,7 @@ bounds are defined</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.ActionSpace.sample_with_info">
|
||||
<code class="descname">sample_with_info</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace.sample_with_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace.sample_with_info" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample_with_info</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="reference internal" href="../_modules/rl_coach/spaces.html#ActionSpace.sample_with_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.ActionSpace.sample_with_info" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get a random action with additional “fake” info</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
@@ -421,7 +421,7 @@ bounds are defined</p>
|
||||
<h3>AttentionActionSpace<a class="headerlink" href="#attentionactionspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.AttentionActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">AttentionActionSpace</code><span class="sig-paren">(</span><em>shape: int</em>, <em>low: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray] = -inf</em>, <em>high: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray] = inf</em>, <em>descriptions: Union[None</em>, <em>List</em>, <em>Dict] = None</em>, <em>default_action: numpy.ndarray = None</em>, <em>forced_attention_size: Union[None</em>, <em>int</em>, <em>float</em>, <em>numpy.ndarray] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#AttentionActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.AttentionActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">AttentionActionSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: int</em>, <em class="sig-param">low: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = -inf</em>, <em class="sig-param">high: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = inf</em>, <em class="sig-param">descriptions: Union[None</em>, <em class="sig-param">List</em>, <em class="sig-param">Dict] = None</em>, <em class="sig-param">default_action: numpy.ndarray = None</em>, <em class="sig-param">forced_attention_size: Union[None</em>, <em class="sig-param">int</em>, <em class="sig-param">float</em>, <em class="sig-param">numpy.ndarray] = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#AttentionActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.AttentionActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A box selection continuous action space, meaning that the actions are defined as selecting a multidimensional box
|
||||
from a given range.
|
||||
The actions will be in the form:
|
||||
@@ -433,7 +433,7 @@ The actions will be in the form:
|
||||
<h3>BoxActionSpace<a class="headerlink" href="#boxactionspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.BoxActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">BoxActionSpace</code><span class="sig-paren">(</span><em>shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#BoxActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.BoxActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">BoxActionSpace</code><span class="sig-paren">(</span><em class="sig-param">shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#BoxActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.BoxActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A multidimensional bounded or unbounded continuous action space</p>
|
||||
</dd></dl>
|
||||
|
||||
@@ -442,7 +442,7 @@ The actions will be in the form:
|
||||
<h3>DiscreteActionSpace<a class="headerlink" href="#discreteactionspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.DiscreteActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">DiscreteActionSpace</code><span class="sig-paren">(</span><em>num_actions: int</em>, <em>descriptions: Union[None</em>, <em>List</em>, <em>Dict] = None</em>, <em>default_action: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#DiscreteActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.DiscreteActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">DiscreteActionSpace</code><span class="sig-paren">(</span><em class="sig-param">num_actions: int</em>, <em class="sig-param">descriptions: Union[None</em>, <em class="sig-param">List</em>, <em class="sig-param">Dict] = None</em>, <em class="sig-param">default_action: numpy.ndarray = None</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#DiscreteActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.DiscreteActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A discrete action space with action indices as actions</p>
|
||||
</dd></dl>
|
||||
|
||||
@@ -451,7 +451,7 @@ The actions will be in the form:
|
||||
<h3>MultiSelectActionSpace<a class="headerlink" href="#multiselectactionspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.MultiSelectActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">MultiSelectActionSpace</code><span class="sig-paren">(</span><em>size: int</em>, <em>max_simultaneous_selected_actions: int = 1</em>, <em>descriptions: Union[None</em>, <em>List</em>, <em>Dict] = None</em>, <em>default_action: numpy.ndarray = None</em>, <em>allow_no_action_to_be_selected=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#MultiSelectActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.MultiSelectActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">MultiSelectActionSpace</code><span class="sig-paren">(</span><em class="sig-param">size: int</em>, <em class="sig-param">max_simultaneous_selected_actions: int = 1</em>, <em class="sig-param">descriptions: Union[None</em>, <em class="sig-param">List</em>, <em class="sig-param">Dict] = None</em>, <em class="sig-param">default_action: numpy.ndarray = None</em>, <em class="sig-param">allow_no_action_to_be_selected=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#MultiSelectActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.MultiSelectActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A discrete action space where multiple actions can be selected at once. The actions are encoded as multi-hot vectors</p>
|
||||
</dd></dl>
|
||||
|
||||
@@ -460,7 +460,7 @@ The actions will be in the form:
|
||||
<h3>CompoundActionSpace<a class="headerlink" href="#compoundactionspace" title="Permalink to this headline">¶</a></h3>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.CompoundActionSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">CompoundActionSpace</code><span class="sig-paren">(</span><em>sub_spaces: List[rl_coach.spaces.ActionSpace]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#CompoundActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.CompoundActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">CompoundActionSpace</code><span class="sig-paren">(</span><em class="sig-param">sub_spaces: List[rl_coach.spaces.ActionSpace]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#CompoundActionSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.CompoundActionSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An action space which consists of multiple sub-action spaces.
|
||||
For example, in Starcraft the agent should choose an action identifier from ~550 options (Discrete(550)),
|
||||
but it also needs to choose 13 different arguments for the selected action identifier, where each argument is
|
||||
@@ -473,7 +473,7 @@ by itself an action space. In Starcraft, the arguments are Discrete action space
|
||||
<h2>Goal Spaces<a class="headerlink" href="#goal-spaces" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.GoalsSpace">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.spaces.</code><code class="descname">GoalsSpace</code><span class="sig-paren">(</span><em>goal_name: str, reward_type: rl_coach.spaces.GoalToRewardConversion, distance_metric: Union[rl_coach.spaces.GoalsSpace.DistanceMetric, Callable]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.spaces.</code><code class="sig-name descname">GoalsSpace</code><span class="sig-paren">(</span><em class="sig-param">goal_name: str, reward_type: rl_coach.spaces.GoalToRewardConversion, distance_metric: Union[rl_coach.spaces.GoalsSpace.DistanceMetric, Callable]</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>A multidimensional space with a goal type definition. It also behaves as an action space, so that hierarchical
|
||||
agents can use it as an output action space.
|
||||
The class acts as a wrapper to the target space. So after setting the target space, all the values of the class
|
||||
@@ -491,13 +491,13 @@ returns the distance between them</p></li>
|
||||
</dl>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.DistanceMetric">
|
||||
<em class="property">class </em><code class="descname">DistanceMetric</code><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.DistanceMetric"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.DistanceMetric" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-name descname">DistanceMetric</code><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.DistanceMetric"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.DistanceMetric" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An enumeration.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.clip_action_to_space">
|
||||
<code class="descname">clip_action_to_space</code><span class="sig-paren">(</span><em>action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.clip_action_to_space" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">clip_action_to_space</code><span class="sig-paren">(</span><em class="sig-param">action: Union[int, float, numpy.ndarray, List]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.clip_action_to_space" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given an action, clip its values to fit to the action space ranges</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -511,7 +511,7 @@ returns the distance between them</p></li>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.contains">
|
||||
<code class="descname">contains</code><span class="sig-paren">(</span><em>val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">contains</code><span class="sig-paren">(</span><em class="sig-param">val: Union[int, float, numpy.ndarray]</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.contains" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if value is contained by this space. The shape must match and
|
||||
all of the values must be within the low and high bounds.</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -526,7 +526,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.distance_from_goal">
|
||||
<code class="descname">distance_from_goal</code><span class="sig-paren">(</span><em>goal: numpy.ndarray</em>, <em>state: dict</em><span class="sig-paren">)</span> → float<a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.distance_from_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.distance_from_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">distance_from_goal</code><span class="sig-paren">(</span><em class="sig-param">goal: numpy.ndarray</em>, <em class="sig-param">state: dict</em><span class="sig-paren">)</span> → float<a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.distance_from_goal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.distance_from_goal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a state, check its distance from the goal</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -543,7 +543,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.get_reward_for_goal_and_state">
|
||||
<code class="descname">get_reward_for_goal_and_state</code><span class="sig-paren">(</span><em>goal: numpy.ndarray</em>, <em>state: dict</em><span class="sig-paren">)</span> → Tuple[float, bool]<a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.get_reward_for_goal_and_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.get_reward_for_goal_and_state" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">get_reward_for_goal_and_state</code><span class="sig-paren">(</span><em class="sig-param">goal: numpy.ndarray</em>, <em class="sig-param">state: dict</em><span class="sig-paren">)</span> → Tuple[float, bool]<a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.get_reward_for_goal_and_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.get_reward_for_goal_and_state" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a state, check if the goal was reached and return a reward accordingly</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -560,7 +560,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.goal_from_state">
|
||||
<code class="descname">goal_from_state</code><span class="sig-paren">(</span><em>state: Dict</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.goal_from_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.goal_from_state" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">goal_from_state</code><span class="sig-paren">(</span><em class="sig-param">state: Dict</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/spaces.html#GoalsSpace.goal_from_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.spaces.GoalsSpace.goal_from_state" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a state, extract an observation according to the goal_name</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -574,7 +574,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.is_valid_index">
|
||||
<code class="descname">is_valid_index</code><span class="sig-paren">(</span><em>index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">is_valid_index</code><span class="sig-paren">(</span><em class="sig-param">index: numpy.ndarray</em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.is_valid_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Checks if a given multidimensional index is within the bounds of the shape of the space</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
@@ -588,7 +588,7 @@ all of the values must be within the low and high bounds.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.sample">
|
||||
<code class="descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → numpy.ndarray<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.sample" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no
|
||||
bounds are defined</p>
|
||||
<dl class="field-list simple">
|
||||
@@ -600,7 +600,7 @@ bounds are defined</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.spaces.GoalsSpace.sample_with_info">
|
||||
<code class="descname">sample_with_info</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.sample_with_info" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="sig-name descname">sample_with_info</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → rl_coach.core_types.ActionInfo<a class="headerlink" href="#rl_coach.spaces.GoalsSpace.sample_with_info" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get a random action with additional “fake” info</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns</dt>
|
||||
|
||||
Reference in New Issue
Block a user