1
0
mirror of https://github.com/gryf/coach.git synced 2026-04-21 07:51:28 +02:00

Add documentation on distributed Coach. (#158)

* Added documentation on distributed Coach.
This commit is contained in:
Balaji Subramaniam
2018-11-27 02:26:15 -08:00
committed by Gal Novik
parent e3ecf445e2
commit d06197f663
151 changed files with 5302 additions and 643 deletions
Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

+9
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../components/additional_parameters.html">Additional Parameters</a></li>
@@ -194,6 +199,8 @@
<li><a href="rl_coach/architectures/network_wrapper.html">rl_coach.architectures.network_wrapper</a></li>
<li><a href="rl_coach/base_parameters.html">rl_coach.base_parameters</a></li>
<li><a href="rl_coach/core_types.html">rl_coach.core_types</a></li>
<li><a href="rl_coach/data_stores/nfs_data_store.html">rl_coach.data_stores.nfs_data_store</a></li>
<li><a href="rl_coach/data_stores/s3_data_store.html">rl_coach.data_stores.s3_data_store</a></li>
<li><a href="rl_coach/environments/carla_environment.html">rl_coach.environments.carla_environment</a></li>
<li><a href="rl_coach/environments/control_suite_environment.html">rl_coach.environments.control_suite_environment</a></li>
<li><a href="rl_coach/environments/doom_environment.html">rl_coach.environments.doom_environment</a></li>
@@ -232,6 +239,7 @@
<li><a href="rl_coach/filters/reward/reward_clipping_filter.html">rl_coach.filters.reward.reward_clipping_filter</a></li>
<li><a href="rl_coach/filters/reward/reward_normalization_filter.html">rl_coach.filters.reward.reward_normalization_filter</a></li>
<li><a href="rl_coach/filters/reward/reward_rescale_filter.html">rl_coach.filters.reward.reward_rescale_filter</a></li>
<li><a href="rl_coach/memories/backend/redis.html">rl_coach.memories.backend.redis</a></li>
<li><a href="rl_coach/memories/episodic/episodic_experience_replay.html">rl_coach.memories.episodic.episodic_experience_replay</a></li>
<li><a href="rl_coach/memories/episodic/episodic_hindsight_experience_replay.html">rl_coach.memories.episodic.episodic_hindsight_experience_replay</a></li>
<li><a href="rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html">rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay</a></li>
@@ -241,6 +249,7 @@
<li><a href="rl_coach/memories/non_episodic/experience_replay.html">rl_coach.memories.non_episodic.experience_replay</a></li>
<li><a href="rl_coach/memories/non_episodic/prioritized_experience_replay.html">rl_coach.memories.non_episodic.prioritized_experience_replay</a></li>
<li><a href="rl_coach/memories/non_episodic/transition_collection.html">rl_coach.memories.non_episodic.transition_collection</a></li>
<li><a href="rl_coach/orchestrators/kubernetes_orchestrator.html">rl_coach.orchestrators.kubernetes_orchestrator</a></li>
<li><a href="rl_coach/spaces.html">rl_coach.spaces</a></li>
</ul>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
+146 -26
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -190,6 +195,7 @@
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">copy</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">random</span>
<span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">OrderedDict</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Tuple</span>
@@ -200,11 +206,12 @@
<span class="kn">from</span> <span class="nn">rl_coach.agents.agent_interface</span> <span class="k">import</span> <span class="n">AgentInterface</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.network_wrapper</span> <span class="k">import</span> <span class="n">NetworkWrapper</span>
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">AgentParameters</span><span class="p">,</span> <span class="n">DistributedTaskParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">AgentParameters</span><span class="p">,</span> <span class="n">Device</span><span class="p">,</span> <span class="n">DeviceType</span><span class="p">,</span> <span class="n">DistributedTaskParameters</span><span class="p">,</span> <span class="n">Frameworks</span>
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">RunPhase</span><span class="p">,</span> <span class="n">PredictionType</span><span class="p">,</span> <span class="n">EnvironmentEpisodes</span><span class="p">,</span> <span class="n">ActionType</span><span class="p">,</span> <span class="n">Batch</span><span class="p">,</span> <span class="n">Episode</span><span class="p">,</span> <span class="n">StateType</span>
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span><span class="p">,</span> <span class="n">ActionInfo</span><span class="p">,</span> <span class="n">TrainingSteps</span><span class="p">,</span> <span class="n">EnvironmentSteps</span><span class="p">,</span> <span class="n">EnvResponse</span>
<span class="kn">from</span> <span class="nn">rl_coach.logger</span> <span class="k">import</span> <span class="n">screen</span><span class="p">,</span> <span class="n">Logger</span><span class="p">,</span> <span class="n">EpisodeLogger</span>
<span class="kn">from</span> <span class="nn">rl_coach.memories.episodic.episodic_experience_replay</span> <span class="k">import</span> <span class="n">EpisodicExperienceReplay</span>
<span class="kn">from</span> <span class="nn">rl_coach.saver</span> <span class="k">import</span> <span class="n">SaverCollection</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">SpacesDefinition</span><span class="p">,</span> <span class="n">VectorObservationSpace</span><span class="p">,</span> <span class="n">GoalsSpace</span><span class="p">,</span> <span class="n">AttentionActionSpace</span>
<span class="kn">from</span> <span class="nn">rl_coach.utils</span> <span class="k">import</span> <span class="n">Signal</span><span class="p">,</span> <span class="n">force_list</span>
<span class="kn">from</span> <span class="nn">rl_coach.utils</span> <span class="k">import</span> <span class="n">dynamic_import_and_instantiate_module_from_params</span>
@@ -255,9 +262,7 @@
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="p">,</span> <span class="s1">&#39;memory_backend_params&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span> <span class="o">=</span> <span class="n">get_memory_backend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="o">.</span><span class="n">run_type</span> <span class="o">==</span> <span class="s1">&#39;trainer&#39;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span><span class="o">.</span><span class="n">subscribe</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="o">.</span><span class="n">run_type</span> <span class="o">!=</span> <span class="s1">&#39;trainer&#39;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">set_memory_backend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span><span class="p">)</span>
<span class="k">if</span> <span class="n">agent_parameters</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">load_memory_from_file_path</span><span class="p">:</span>
@@ -273,28 +278,49 @@
<span class="bp">self</span><span class="o">.</span><span class="n">has_global</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">replicated_device</span> <span class="o">=</span> <span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">device</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">=</span> <span class="s2">&quot;/job:worker/task:</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">has_global</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">replicated_device</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
<span class="k">if</span> <span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">use_cpu</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">+=</span> <span class="s2">&quot;/cpu:0&quot;</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">+=</span> <span class="s2">&quot;/device:GPU:0&quot;</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">has_global</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">replicated_device</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">use_cpu</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">=</span> <span class="n">Device</span><span class="p">(</span><span class="n">DeviceType</span><span class="o">.</span><span class="n">CPU</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span> <span class="o">=</span> <span class="p">[</span><span class="n">Device</span><span class="p">(</span><span class="n">DeviceType</span><span class="o">.</span><span class="n">GPU</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">num_gpu</span><span class="p">)]</span>
<span class="c1"># filters</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">input_filter</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_name</span><span class="p">(</span><span class="s1">&#39;input_filter&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">output_filter</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_name</span><span class="p">(</span><span class="s1">&#39;output_filter&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">pre_network_filter</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_name</span><span class="p">(</span><span class="s1">&#39;pre_network_filter&#39;</span><span class="p">)</span>
<span class="n">device</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">replicated_device</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">replicated_device</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">worker_device</span>
<span class="c1"># TODO-REMOVE This is a temporary flow dividing to 3 modes. To be converged to a single flow once distributed tf</span>
<span class="c1"># is removed, and Redis is used for sharing data between local workers.</span>
<span class="c1"># Filters MoW will be split between different configurations</span>
<span class="c1"># 1. Distributed coach synchrnization type (=distributed across multiple nodes) - Redis based data sharing + numpy arithmetic backend</span>
<span class="c1"># 2. Distributed TF (=distributed on a single node, using distributed TF) - TF for both data sharing and arithmetic backend</span>
<span class="c1"># 3. Single worker (=both TF and Mxnet) - no data sharing needed + numpy arithmetic backend</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="p">,</span> <span class="s1">&#39;memory_backend_params&#39;</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">distributed_coach_synchronization_type</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">memory</span><span class="o">.</span><span class="n">memory_backend_params</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="p">)</span> <span class="o">==</span> <span class="n">DistributedTaskParameters</span> <span class="ow">and</span>
<span class="n">agent_parameters</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">framework_type</span> <span class="o">==</span> <span class="n">Frameworks</span><span class="o">.</span><span class="n">tensorflow</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;tf&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;tf&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;tf&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">set_device</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span>
<span class="c1"># initialize all internal variables</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_phase</span> <span class="o">=</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">HEATUP</span>
@@ -544,9 +570,9 @@
<span class="c1"># we write to the next episode, because it could be that the current episode was already written</span>
<span class="c1"># to disk and then we won&#39;t write it again</span>
<span class="bp">self</span><span class="o">.</span><span class="n">agent_logger</span><span class="o">.</span><span class="n">set_current_time</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">current_episode</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">evaluation_reward</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">accumulated_rewards_across_evaluation_episodes</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_evaluation_episodes_completed</span>
<span class="bp">self</span><span class="o">.</span><span class="n">agent_logger</span><span class="o">.</span><span class="n">create_signal_value</span><span class="p">(</span>
<span class="s1">&#39;Evaluation Reward&#39;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">accumulated_rewards_across_evaluation_episodes</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_evaluation_episodes_completed</span><span class="p">)</span>
<span class="s1">&#39;Evaluation Reward&#39;</span><span class="p">,</span> <span class="n">evaluation_reward</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">agent_logger</span><span class="o">.</span><span class="n">create_signal_value</span><span class="p">(</span>
<span class="s1">&#39;Shaped Evaluation Reward&#39;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">accumulated_shaped_rewards_across_evaluation_episodes</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_evaluation_episodes_completed</span><span class="p">)</span>
@@ -556,8 +582,8 @@
<span class="n">success_rate</span>
<span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">is_a_highest_level_agent</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="s2">&quot;high&quot;</span><span class="p">:</span>
<span class="n">screen</span><span class="o">.</span><span class="n">log_title</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2">: Finished evaluation phase. Success rate = </span><span class="si">{}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">success_rate</span><span class="p">,</span> <span class="mi">2</span><span class="p">)))</span></div>
<span class="n">screen</span><span class="o">.</span><span class="n">log_title</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2">: Finished evaluation phase. Success rate = </span><span class="si">{}</span><span class="s2">, Avg Total Reward = </span><span class="si">{}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">success_rate</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">evaluation_reward</span><span class="p">,</span> <span class="mi">2</span><span class="p">)))</span></div>
<div class="viewcode-block" id="Agent.call_memory"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.call_memory">[docs]</a> <span class="k">def</span> <span class="nf">call_memory</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">()):</span>
<span class="sd">&quot;&quot;&quot;</span>
@@ -758,14 +784,14 @@
<span class="s2">&quot;EnvironmentSteps or TrainingSteps. Instead it is </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">step_method</span><span class="o">.</span><span class="vm">__class__</span><span class="p">))</span>
<span class="k">return</span> <span class="n">should_update</span>
<span class="k">def</span> <span class="nf">_should_train</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">_should_train</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Determine if we should start a training phase according to the number of steps passed since the last training</span>
<span class="sd"> :return: boolean: True if we should start a training phase</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">should_update</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train_helper</span><span class="p">(</span><span class="n">wait_for_full_episode</span><span class="o">=</span><span class="n">wait_for_full_episode</span><span class="p">)</span>
<span class="n">should_update</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train_helper</span><span class="p">()</span>
<span class="n">step_method</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span>
@@ -777,8 +803,8 @@
<span class="k">return</span> <span class="n">should_update</span>
<span class="k">def</span> <span class="nf">_should_train_helper</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">_should_train_helper</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">wait_for_full_episode</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">act_for_full_episodes</span>
<span class="n">step_method</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span>
<span class="k">if</span> <span class="n">step_method</span><span class="o">.</span><span class="vm">__class__</span> <span class="o">==</span> <span class="n">EnvironmentEpisodes</span><span class="p">:</span>
@@ -1079,14 +1105,34 @@
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">observation_filters</span><span class="p">[</span><span class="s1">&#39;attention&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">crop_high</span> <span class="o">=</span> <span class="n">action</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">action_filters</span><span class="p">[</span><span class="s1">&#39;masking&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">set_masking</span><span class="p">(</span><span class="n">action</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">action</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span></div>
<div class="viewcode-block" id="Agent.save_checkpoint"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.save_checkpoint">[docs]</a> <span class="k">def</span> <span class="nf">save_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<div class="viewcode-block" id="Agent.save_checkpoint"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.save_checkpoint">[docs]</a> <span class="k">def</span> <span class="nf">save_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Allows agents to store additional information when saving checkpoints.</span>
<span class="sd"> :param checkpoint_id: the id of the checkpoint</span>
<span class="sd"> :param checkpoint_prefix: The prefix of the checkpoint file to save</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<span class="n">checkpoint_dir</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">checkpoint_save_dir</span>
<span class="n">checkpoint_prefix</span> <span class="o">=</span> <span class="s1">&#39;.&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">checkpoint_prefix</span><span class="p">]</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">full_name_id</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="c1"># adds both level name and agent name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">save_state_to_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span><span class="o">.</span><span class="n">save_state_to_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">save_state_to_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span></div>
<div class="viewcode-block" id="Agent.restore_checkpoint"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.restore_checkpoint">[docs]</a> <span class="k">def</span> <span class="nf">restore_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Allows agents to store additional information when saving checkpoints.</span>
<span class="sd"> :param checkpoint_dir: The checkpoint dir to restore from</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">checkpoint_prefix</span> <span class="o">=</span> <span class="s1">&#39;.&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">full_name_id</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="c1"># adds both level name and agent name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span><span class="o">.</span><span class="n">restore_state_from_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span><span class="o">.</span><span class="n">restore_state_from_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span></div>
<span class="c1"># no output filters currently have an internal state to restore</span>
<span class="c1"># self.output_filter.restore_state_from_checkpoint(checkpoint_dir)</span>
<div class="viewcode-block" id="Agent.sync"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.sync">[docs]</a> <span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
@@ -1097,8 +1143,82 @@
<span class="k">for</span> <span class="n">network</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
<span class="n">network</span><span class="o">.</span><span class="n">sync</span><span class="p">()</span></div>
<span class="c1"># TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create</span>
<span class="c1"># an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]</span>
<div class="viewcode-block" id="Agent.emulate_observe_on_trainer"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.emulate_observe_on_trainer">[docs]</a> <span class="k">def</span> <span class="nf">emulate_observe_on_trainer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transition</span><span class="p">:</span> <span class="n">Transition</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This emulates the observe using the transition obtained from the rollout worker on the training worker</span>
<span class="sd"> in case of distributed training.</span>
<span class="sd"> Given a response from the environment, distill the observation from it and store it for later use.</span>
<span class="sd"> The response should be a dictionary containing the performed action, the new observation and measurements,</span>
<span class="sd"> the reward, a game over flag and any additional information necessary.</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># if we are in the first step in the episode, then we don&#39;t have a a next state and a reward and thus no</span>
<span class="c1"># transition yet, and therefore we don&#39;t need to store anything in the memory.</span>
<span class="c1"># also we did not reach the goal yet.</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">current_episode_steps_counter</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># initialize the current state</span>
<span class="k">return</span> <span class="n">transition</span><span class="o">.</span><span class="n">game_over</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># sum up the total shaped reward</span>
<span class="bp">self</span><span class="o">.</span><span class="n">total_shaped_reward_in_current_episode</span> <span class="o">+=</span> <span class="n">transition</span><span class="o">.</span><span class="n">reward</span>
<span class="bp">self</span><span class="o">.</span><span class="n">total_reward_in_current_episode</span> <span class="o">+=</span> <span class="n">transition</span><span class="o">.</span><span class="n">reward</span>
<span class="bp">self</span><span class="o">.</span><span class="n">shaped_reward</span><span class="o">.</span><span class="n">add_sample</span><span class="p">(</span><span class="n">transition</span><span class="o">.</span><span class="n">reward</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reward</span><span class="o">.</span><span class="n">add_sample</span><span class="p">(</span><span class="n">transition</span><span class="o">.</span><span class="n">reward</span><span class="p">)</span>
<span class="c1"># create and store the transition</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="ow">in</span> <span class="p">[</span><span class="n">RunPhase</span><span class="o">.</span><span class="n">TRAIN</span><span class="p">,</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">HEATUP</span><span class="p">]:</span>
<span class="c1"># for episodic memories we keep the transitions in a local buffer until the episode is ended.</span>
<span class="c1"># for regular memories we insert the transitions directly to the memory</span>
<span class="bp">self</span><span class="o">.</span><span class="n">current_episode_buffer</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">transition</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">memory</span><span class="p">,</span> <span class="n">EpisodicExperienceReplay</span><span class="p">)</span> \
<span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">store_transitions_only_when_episodes_are_terminated</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">call_memory</span><span class="p">(</span><span class="s1">&#39;store&#39;</span><span class="p">,</span> <span class="n">transition</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">visualization</span><span class="o">.</span><span class="n">dump_in_episode_signals</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">update_step_in_episode_log</span><span class="p">()</span>
<span class="k">return</span> <span class="n">transition</span><span class="o">.</span><span class="n">game_over</span></div>
<span class="c1"># TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create</span>
<span class="c1"># an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]</span>
<div class="viewcode-block" id="Agent.emulate_act_on_trainer"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.emulate_act_on_trainer">[docs]</a> <span class="k">def</span> <span class="nf">emulate_act_on_trainer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transition</span><span class="p">:</span> <span class="n">Transition</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ActionInfo</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This emulates the act using the transition obtained from the rollout worker on the training worker</span>
<span class="sd"> in case of distributed training.</span>
<span class="sd"> Given the agents current knowledge, decide on the next action to apply to the environment</span>
<span class="sd"> :return: an action and a dictionary containing any additional info from the action decision process</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">==</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">TRAIN</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span><span class="o">.</span><span class="n">num_steps</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># This agent never plays while training (e.g. behavioral cloning)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="c1"># count steps (only when training or if we are in the evaluation worker)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">!=</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">TEST</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">evaluate_only</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">total_steps_counter</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">current_episode_steps_counter</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">last_action_info</span> <span class="o">=</span> <span class="n">transition</span><span class="o">.</span><span class="n">action</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_action_info</span></div>
<span class="k">def</span> <span class="nf">get_success_rate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_successes_across_evaluation_episodes</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_evaluation_episodes_completed</span></div>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_successes_across_evaluation_episodes</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_evaluation_episodes_completed</span>
<div class="viewcode-block" id="Agent.collect_savers"><a class="viewcode-back" href="../../../components/agents/index.html#rl_coach.agents.agent.Agent.collect_savers">[docs]</a> <span class="k">def</span> <span class="nf">collect_savers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parent_path_suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SaverCollection</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Collect all of agent&#39;s network savers</span>
<span class="sd"> :param parent_path_suffix: path suffix of the parent of the agent</span>
<span class="sd"> (could be name of level manager or composite agent)</span>
<span class="sd"> :return: collection of all agent savers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">parent_path_suffix</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{}</span><span class="s2">.</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">parent_path_suffix</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">savers</span> <span class="o">=</span> <span class="n">SaverCollection</span><span class="p">()</span>
<span class="k">for</span> <span class="n">network</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
<span class="n">savers</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">network</span><span class="o">.</span><span class="n">collect_savers</span><span class="p">(</span><span class="n">parent_path_suffix</span><span class="p">))</span>
<span class="k">return</span> <span class="n">savers</span></div></div>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -286,7 +291,8 @@
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">2048</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optimization_epochs</span> <span class="o">=</span> <span class="mi">10</span>
<span class="bp">self</span><span class="o">.</span><span class="n">normalization_stats</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">clipping_decay_schedule</span> <span class="o">=</span> <span class="n">ConstantSchedule</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span></div>
<span class="bp">self</span><span class="o">.</span><span class="n">clipping_decay_schedule</span> <span class="o">=</span> <span class="n">ConstantSchedule</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">True</span></div>
<span class="k">class</span> <span class="nc">ClippedPPOAgentParameters</span><span class="p">(</span><span class="n">AgentParameters</span><span class="p">):</span>
@@ -469,11 +475,8 @@
<span class="c1"># clean memory</span>
<span class="bp">self</span><span class="o">.</span><span class="n">call_memory</span><span class="p">(</span><span class="s1">&#39;clean&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_should_train_helper</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">_should_train_helper</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train</span><span class="p">(</span><span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train</span><span class="p">():</span>
<span class="k">for</span> <span class="n">network</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
<span class="n">network</span><span class="o">.</span><span class="n">set_is_training</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
@@ -509,6 +512,7 @@
<span class="k">def</span> <span class="nf">choose_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">curr_state</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">clipping_decay_schedule</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">choose_action</span><span class="p">(</span><span class="n">curr_state</span><span class="p">)</span>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
+8 -2
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -378,8 +383,9 @@
<span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="p">[</span><span class="s1">&#39;main&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">online_network</span><span class="o">.</span><span class="n">output_heads</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">DND</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">current_episode_state_embeddings</span><span class="p">,</span>
<span class="n">actions</span><span class="p">,</span> <span class="n">discounted_rewards</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_id</span><span class="p">):</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">checkpoint_save_dir</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">checkpoint_id</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;.dnd&#39;</span><span class="p">),</span> <span class="s1">&#39;wb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">save_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">save_checkpoint</span><span class="p">(</span><span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">task_parameters</span><span class="o">.</span><span class="n">checkpoint_save_dir</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">checkpoint_prefix</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;.dnd&#39;</span><span class="p">),</span> <span class="s1">&#39;wb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="p">[</span><span class="s1">&#39;main&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">online_network</span><span class="o">.</span><span class="n">output_heads</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">DND</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
</pre></div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
+9 -5
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -295,7 +300,8 @@
<span class="bp">self</span><span class="o">.</span><span class="n">estimate_state_value_using_gae</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">use_kl_regularization</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">beta_entropy</span> <span class="o">=</span> <span class="mf">0.01</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">5000</span><span class="p">)</span></div>
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">5000</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">True</span></div>
<span class="k">class</span> <span class="nc">PPOAgentParameters</span><span class="p">(</span><span class="n">AgentParameters</span><span class="p">):</span>
@@ -529,12 +535,9 @@
<span class="c1"># clean memory</span>
<span class="bp">self</span><span class="o">.</span><span class="n">call_memory</span><span class="p">(</span><span class="s1">&#39;clean&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_should_train_helper</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">_should_train_helper</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">loss</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train</span><span class="p">(</span><span class="n">wait_for_full_episode</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_should_train</span><span class="p">():</span>
<span class="k">for</span> <span class="n">network</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
<span class="n">network</span><span class="o">.</span><span class="n">set_is_training</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
@@ -566,6 +569,7 @@
<span class="k">def</span> <span class="nf">get_prediction</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">states</span><span class="p">):</span>
<span class="n">tf_input_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">prepare_batch_for_inference</span><span class="p">(</span><span class="n">states</span><span class="p">,</span> <span class="s2">&quot;actor&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">networks</span><span class="p">[</span><span class="s1">&#39;actor&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">online_network</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">tf_input_state</span><span class="p">)</span>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -194,10 +199,23 @@
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">AgentParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.saver</span> <span class="k">import</span> <span class="n">SaverCollection</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">SpacesDefinition</span>
<div class="viewcode-block" id="Architecture"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.architecture.Architecture">[docs]</a><span class="k">class</span> <span class="nc">Architecture</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<div class="viewcode-block" id="Architecture.construct"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.architecture.Architecture.construct">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">construct</span><span class="p">(</span><span class="n">variable_scope</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">devices</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;Architecture&#39;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Construct a network class using the provided variable scope and on requested devices</span>
<span class="sd"> :param variable_scope: string specifying variable scope under which to create network variables</span>
<span class="sd"> :param devices: list of devices (can be list of Device objects, or string for TF distributed)</span>
<span class="sd"> :param args: all other arguments for class initializer</span>
<span class="sd"> :param kwargs: all other keyword arguments for class initializer</span>
<span class="sd"> :return: an object which is a child of Architecture</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent_parameters</span><span class="p">:</span> <span class="n">AgentParameters</span><span class="p">,</span> <span class="n">spaces</span><span class="p">:</span> <span class="n">SpacesDefinition</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a neural network &#39;architecture&#39;, that can be trained and used for inference.</span>
@@ -386,6 +404,15 @@
<span class="sd"> :param assign_op: a parameter representing the operation for assigning value to a specific variable</span>
<span class="sd"> :param value: value of the specified variable used for update</span>
<span class="sd"> :param placeholder: a placeholder for binding the value to assign_op.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="Architecture.collect_savers"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.architecture.Architecture.collect_savers">[docs]</a> <span class="k">def</span> <span class="nf">collect_savers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parent_path_suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SaverCollection</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Collection of all savers for the network (typically only one saver for network and one for ONNX export)</span>
<span class="sd"> :param parent_path_suffix: path suffix of the parent of the network</span>
<span class="sd"> (e.g. could be name of level manager plus name of agent)</span>
<span class="sd"> :return: saver collection for the network</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div></div>
</pre></div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -193,7 +198,9 @@
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">Frameworks</span><span class="p">,</span> <span class="n">AgentParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.logger</span> <span class="k">import</span> <span class="n">failed_imports</span>
<span class="kn">from</span> <span class="nn">rl_coach.saver</span> <span class="k">import</span> <span class="n">SaverCollection</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">SpacesDefinition</span>
<span class="kn">from</span> <span class="nn">rl_coach.utils</span> <span class="k">import</span> <span class="n">force_list</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">tensorflow</span> <span class="k">as</span> <span class="nn">tf</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.general_network</span> <span class="k">import</span> <span class="n">GeneralTensorFlowNetwork</span>
@@ -227,26 +234,27 @@
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">network_parameters</span><span class="o">.</span><span class="n">framework</span> <span class="o">==</span> <span class="n">Frameworks</span><span class="o">.</span><span class="n">tensorflow</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;tensorflow&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">failed_imports</span><span class="p">:</span>
<span class="n">general_network</span> <span class="o">=</span> <span class="n">GeneralTensorFlowNetwork</span>
<span class="n">general_network</span> <span class="o">=</span> <span class="n">GeneralTensorFlowNetwork</span><span class="o">.</span><span class="n">construct</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">&#39;Install tensorflow before using it as framework&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">network_parameters</span><span class="o">.</span><span class="n">framework</span> <span class="o">==</span> <span class="n">Frameworks</span><span class="o">.</span><span class="n">mxnet</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;mxnet&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">failed_imports</span><span class="p">:</span>
<span class="n">general_network</span> <span class="o">=</span> <span class="n">GeneralMxnetNetwork</span>
<span class="n">general_network</span> <span class="o">=</span> <span class="n">GeneralMxnetNetwork</span><span class="o">.</span><span class="n">construct</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">&#39;Install mxnet before using it as framework&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> Framework is not supported&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">Frameworks</span><span class="p">()</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">network_parameters</span><span class="o">.</span><span class="n">framework</span><span class="p">)))</span>
<span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">variable_scope</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2">/</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">full_name_id</span><span class="p">,</span> <span class="n">name</span><span class="p">)):</span>
<span class="n">variable_scope</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{}</span><span class="s2">/</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ap</span><span class="o">.</span><span class="n">full_name_id</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
<span class="c1"># Global network - the main network shared between threads</span>
<span class="bp">self</span><span class="o">.</span><span class="n">global_network</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_global</span><span class="p">:</span>
<span class="c1"># we assign the parameters of this network on the parameters server</span>
<span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">replicated_device</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">global_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">global_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">variable_scope</span><span class="o">=</span><span class="n">variable_scope</span><span class="p">,</span>
<span class="n">devices</span><span class="o">=</span><span class="n">force_list</span><span class="p">(</span><span class="n">replicated_device</span><span class="p">),</span>
<span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;</span><span class="si">{}</span><span class="s1">/global&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
<span class="n">global_network</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">network_is_local</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
@@ -255,8 +263,9 @@
<span class="c1"># Online network - local copy of the main network used for playing</span>
<span class="bp">self</span><span class="o">.</span><span class="n">online_network</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">worker_device</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">online_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">online_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">variable_scope</span><span class="o">=</span><span class="n">variable_scope</span><span class="p">,</span>
<span class="n">devices</span><span class="o">=</span><span class="n">force_list</span><span class="p">(</span><span class="n">worker_device</span><span class="p">),</span>
<span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;</span><span class="si">{}</span><span class="s1">/online&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
<span class="n">global_network</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">global_network</span><span class="p">,</span>
<span class="n">network_is_local</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
@@ -266,8 +275,9 @@
<span class="c1"># Target network - a local, slow updating network used for stabilizing the learning</span>
<span class="bp">self</span><span class="o">.</span><span class="n">target_network</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_target</span><span class="p">:</span>
<span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">worker_device</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">target_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">target_network</span> <span class="o">=</span> <span class="n">general_network</span><span class="p">(</span><span class="n">variable_scope</span><span class="o">=</span><span class="n">variable_scope</span><span class="p">,</span>
<span class="n">devices</span><span class="o">=</span><span class="n">force_list</span><span class="p">(</span><span class="n">worker_device</span><span class="p">),</span>
<span class="n">agent_parameters</span><span class="o">=</span><span class="n">agent_parameters</span><span class="p">,</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;</span><span class="si">{}</span><span class="s1">/target&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">name</span><span class="p">),</span>
<span class="n">global_network</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">global_network</span><span class="p">,</span>
<span class="n">network_is_local</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
@@ -372,26 +382,6 @@
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">online_network</span><span class="p">)</span><span class="o">.</span><span class="n">parallel_predict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sess</span><span class="p">,</span> <span class="n">network_input_tuples</span><span class="p">)</span></div>
<div class="viewcode-block" id="NetworkWrapper.get_local_variables"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.get_local_variables">[docs]</a> <span class="k">def</span> <span class="nf">get_local_variables</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get all the variables that are local to the thread</span>
<span class="sd"> :return: a list of all the variables that are local to the thread</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">local_variables</span> <span class="o">=</span> <span class="p">[</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">tf</span><span class="o">.</span><span class="n">local_variables</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">online_network</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_target</span><span class="p">:</span>
<span class="n">local_variables</span> <span class="o">+=</span> <span class="p">[</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">tf</span><span class="o">.</span><span class="n">local_variables</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_network</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">return</span> <span class="n">local_variables</span></div>
<div class="viewcode-block" id="NetworkWrapper.get_global_variables"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.get_global_variables">[docs]</a> <span class="k">def</span> <span class="nf">get_global_variables</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get all the variables that are shared between threads</span>
<span class="sd"> :return: a list of all the variables that are shared between threads</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">global_variables</span> <span class="o">=</span> <span class="p">[</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">tf</span><span class="o">.</span><span class="n">global_variables</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">global_network</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">return</span> <span class="n">global_variables</span></div>
<div class="viewcode-block" id="NetworkWrapper.set_is_training"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.set_is_training">[docs]</a> <span class="k">def</span> <span class="nf">set_is_training</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the phase of the network between training and testing</span>
@@ -425,7 +415,29 @@
<span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="o">*</span><span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
<span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">online_network</span><span class="p">))</span>
<span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="p">)</span></div>
<span class="k">return</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<div class="viewcode-block" id="NetworkWrapper.collect_savers"><a class="viewcode-back" href="../../../components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.collect_savers">[docs]</a> <span class="k">def</span> <span class="nf">collect_savers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parent_path_suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SaverCollection</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Collect all of network&#39;s savers for global or online network</span>
<span class="sd"> Note: global, online, and target network are all copies fo the same network which parameters that are</span>
<span class="sd"> updated at different rates. So we only need to save one of the networks; the one that holds the most</span>
<span class="sd"> recent parameters. target network is created for some agents and used for stabilizing training by</span>
<span class="sd"> updating parameters from online network at a slower rate. As a result, target network never contains</span>
<span class="sd"> the most recent set of parameters. In single-worker training, no global network is created and online</span>
<span class="sd"> network contains the most recent parameters. In vertical distributed training with more than one worker,</span>
<span class="sd"> global network is updated by all workers and contains the most recent parameters.</span>
<span class="sd"> Therefore preference is given to global network if it exists, otherwise online network is used</span>
<span class="sd"> for saving.</span>
<span class="sd"> :param parent_path_suffix: path suffix of the parent of the network wrapper</span>
<span class="sd"> (e.g. could be name of level manager plus name of agent)</span>
<span class="sd"> :return: collection of all checkpoint objects</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">global_network</span><span class="p">:</span>
<span class="n">savers</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">global_network</span><span class="o">.</span><span class="n">collect_savers</span><span class="p">(</span><span class="n">parent_path_suffix</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">savers</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">online_network</span><span class="o">.</span><span class="n">collect_savers</span><span class="p">(</span><span class="n">parent_path_suffix</span><span class="p">)</span>
<span class="k">return</span> <span class="n">savers</span></div></div>
</pre></div>
</div>
+57 -5
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -227,6 +232,43 @@
<span class="c1">#ConcatDepthWise = 2</span>
<span class="c1">#Multiply = 3</span>
<span class="k">class</span> <span class="nc">RunType</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
<span class="n">ORCHESTRATOR</span> <span class="o">=</span> <span class="s2">&quot;orchestrator&quot;</span>
<span class="n">TRAINER</span> <span class="o">=</span> <span class="s2">&quot;trainer&quot;</span>
<span class="n">ROLLOUT_WORKER</span> <span class="o">=</span> <span class="s2">&quot;rollout-worker&quot;</span>
<span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span>
<span class="k">class</span> <span class="nc">DeviceType</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
<span class="n">CPU</span> <span class="o">=</span> <span class="s1">&#39;cpu&#39;</span>
<span class="n">GPU</span> <span class="o">=</span> <span class="s1">&#39;gpu&#39;</span>
<span class="k">class</span> <span class="nc">Device</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">device_type</span><span class="p">:</span> <span class="n">DeviceType</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param device_type: type of device (CPU/GPU)</span>
<span class="sd"> :param index: index of device (only used if device type is GPU)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_device_type</span> <span class="o">=</span> <span class="n">device_type</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_index</span> <span class="o">=</span> <span class="n">index</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">device_type</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_device_type</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_index</span>
<span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="s2">&quot;</span><span class="si">{}{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_device_type</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_index</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="c1"># DistributedCoachSynchronizationType provides the synchronization type for distributed Coach.</span>
<span class="c1"># The default value is None, which means the algorithm or preset cannot be used with distributed Coach.</span>
@@ -346,6 +388,9 @@
<span class="c1"># Distributed Coach params</span>
<span class="bp">self</span><span class="o">.</span><span class="n">distributed_coach_synchronization_type</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Should the workers wait for full episode</span>
<span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">False</span>
<div class="viewcode-block" id="PresetValidationParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.PresetValidationParameters">[docs]</a><span class="k">class</span> <span class="nc">PresetValidationParameters</span><span class="p">(</span><span class="n">Parameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
@@ -670,7 +715,7 @@
<span class="bp">self</span><span class="o">.</span><span class="n">input_filter</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output_filter</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pre_network_filter</span> <span class="o">=</span> <span class="n">NoInputFilter</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">full_name_id</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># TODO: do we really want to hold this parameter here?</span>
<span class="bp">self</span><span class="o">.</span><span class="n">full_name_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_a_highest_level_agent</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_a_lowest_level_agent</span> <span class="o">=</span> <span class="kc">True</span>
@@ -684,7 +729,8 @@
<div class="viewcode-block" id="TaskParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.TaskParameters">[docs]</a><span class="k">class</span> <span class="nc">TaskParameters</span><span class="p">(</span><span class="n">Parameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">framework_type</span><span class="p">:</span> <span class="n">Frameworks</span><span class="o">=</span><span class="n">Frameworks</span><span class="o">.</span><span class="n">tensorflow</span><span class="p">,</span> <span class="n">evaluate_only</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">use_cpu</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">experiment_path</span><span class="o">=</span><span class="s1">&#39;/tmp&#39;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">checkpoint_save_secs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">checkpoint_restore_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">checkpoint_save_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">export_onnx_graph</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="n">checkpoint_save_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">export_onnx_graph</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">apply_stop_condition</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">num_gpu</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param framework_type: deep learning framework type. currently only tensorflow is supported</span>
<span class="sd"> :param evaluate_only: the task will be used only for evaluating the model</span>
@@ -695,6 +741,8 @@
<span class="sd"> :param checkpoint_restore_dir: the directory to restore the checkpoints from</span>
<span class="sd"> :param checkpoint_save_dir: the directory to store the checkpoints in</span>
<span class="sd"> :param export_onnx_graph: If set to True, this will export an onnx graph each time a checkpoint is saved</span>
<span class="sd"> :param apply_stop_condition: If set to True, this will apply the stop condition defined by reaching a target success rate</span>
<span class="sd"> :param num_gpu: number of GPUs to use</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">framework_type</span> <span class="o">=</span> <span class="n">framework_type</span>
<span class="bp">self</span><span class="o">.</span><span class="n">task_index</span> <span class="o">=</span> <span class="mi">0</span> <span class="c1"># TODO: not really needed</span>
@@ -705,7 +753,9 @@
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint_restore_dir</span> <span class="o">=</span> <span class="n">checkpoint_restore_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint_save_dir</span> <span class="o">=</span> <span class="n">checkpoint_save_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">seed</span> <span class="o">=</span> <span class="n">seed</span>
<span class="bp">self</span><span class="o">.</span><span class="n">export_onnx_graph</span> <span class="o">=</span> <span class="n">export_onnx_graph</span></div>
<span class="bp">self</span><span class="o">.</span><span class="n">export_onnx_graph</span> <span class="o">=</span> <span class="n">export_onnx_graph</span>
<span class="bp">self</span><span class="o">.</span><span class="n">apply_stop_condition</span> <span class="o">=</span> <span class="n">apply_stop_condition</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_gpu</span> <span class="o">=</span> <span class="n">num_gpu</span></div>
<div class="viewcode-block" id="DistributedTaskParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.DistributedTaskParameters">[docs]</a><span class="k">class</span> <span class="nc">DistributedTaskParameters</span><span class="p">(</span><span class="n">TaskParameters</span><span class="p">):</span>
@@ -713,7 +763,7 @@
<span class="n">task_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">evaluate_only</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">num_tasks</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">num_training_tasks</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">use_cpu</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">experiment_path</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">dnd</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">shared_memory_scratchpad</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">checkpoint_save_secs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">checkpoint_restore_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">checkpoint_save_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">export_onnx_graph</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="n">checkpoint_save_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">export_onnx_graph</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">apply_stop_condition</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param framework_type: deep learning framework type. currently only tensorflow is supported</span>
<span class="sd"> :param evaluate_only: the task will be used only for evaluating the model</span>
@@ -732,11 +782,13 @@
<span class="sd"> :param checkpoint_restore_dir: the directory to restore the checkpoints from</span>
<span class="sd"> :param checkpoint_save_dir: the directory to store the checkpoints in</span>
<span class="sd"> :param export_onnx_graph: If set to True, this will export an onnx graph each time a checkpoint is saved</span>
<span class="sd"> :param apply_stop_condition: If set to True, this will apply the stop condition defined by reaching a target success rate</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">framework_type</span><span class="o">=</span><span class="n">framework_type</span><span class="p">,</span> <span class="n">evaluate_only</span><span class="o">=</span><span class="n">evaluate_only</span><span class="p">,</span> <span class="n">use_cpu</span><span class="o">=</span><span class="n">use_cpu</span><span class="p">,</span>
<span class="n">experiment_path</span><span class="o">=</span><span class="n">experiment_path</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">,</span> <span class="n">checkpoint_save_secs</span><span class="o">=</span><span class="n">checkpoint_save_secs</span><span class="p">,</span>
<span class="n">checkpoint_restore_dir</span><span class="o">=</span><span class="n">checkpoint_restore_dir</span><span class="p">,</span> <span class="n">checkpoint_save_dir</span><span class="o">=</span><span class="n">checkpoint_save_dir</span><span class="p">,</span>
<span class="n">export_onnx_graph</span><span class="o">=</span><span class="n">export_onnx_graph</span><span class="p">)</span>
<span class="n">export_onnx_graph</span><span class="o">=</span><span class="n">export_onnx_graph</span><span class="p">,</span> <span class="n">apply_stop_condition</span><span class="o">=</span><span class="n">apply_stop_condition</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">parameters_server_hosts</span> <span class="o">=</span> <span class="n">parameters_server_hosts</span>
<span class="bp">self</span><span class="o">.</span><span class="n">worker_hosts</span> <span class="o">=</span> <span class="n">worker_hosts</span>
<span class="bp">self</span><span class="o">.</span><span class="n">job_type</span> <span class="o">=</span> <span class="n">job_type</span>
+9
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -289,6 +294,10 @@
<span class="k">pass</span>
<span class="k">class</span> <span class="nc">InputTensorEmbedding</span><span class="p">(</span><span class="n">InputEmbedding</span><span class="p">):</span>
<span class="k">pass</span>
<span class="k">class</span> <span class="nc">Middleware_FC_Embedding</span><span class="p">(</span><span class="n">MiddlewareEmbedding</span><span class="p">):</span>
<span class="k">pass</span>
@@ -0,0 +1,501 @@
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>rl_coach.data_stores.nfs_data_store &mdash; Reinforcement Learning Coach 0.11.0 documentation</title>
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
<script src="../../../_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="../../../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../../../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../index.html">Module code</a> &raquo;</li>
<li>rl_coach.data_stores.nfs_data_store</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for rl_coach.data_stores.nfs_data_store</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">uuid</span>
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
<span class="k">class</span> <span class="nc">NFSDataStoreParameters</span><span class="p">(</span><span class="n">DataStoreParameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ds_params</span><span class="p">,</span> <span class="n">deployed</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">server</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">ds_params</span><span class="o">.</span><span class="n">store_type</span><span class="p">,</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_type</span><span class="p">,</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">namespace</span> <span class="o">=</span> <span class="s2">&quot;default&quot;</span>
<span class="k">if</span> <span class="s2">&quot;namespace&quot;</span> <span class="ow">in</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">namespace</span> <span class="o">=</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s2">&quot;namespace&quot;</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pvc_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pv_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">svc_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">server</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">path</span> <span class="o">=</span> <span class="s2">&quot;/&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">deployed</span> <span class="o">=</span> <span class="n">deployed</span>
<span class="k">if</span> <span class="n">deployed</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">server</span> <span class="o">=</span> <span class="n">server</span>
<span class="bp">self</span><span class="o">.</span><span class="n">path</span> <span class="o">=</span> <span class="n">path</span>
<div class="viewcode-block" id="NFSDataStore"><a class="viewcode-back" href="../../../components/data_stores/index.html#rl_coach.data_stores.nfs_data_store.NFSDataStore">[docs]</a><span class="k">class</span> <span class="nc">NFSDataStore</span><span class="p">(</span><span class="n">DataStore</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An implementation of data store which uses NFS for storing policy checkpoints when using Coach in distributed mode.</span>
<span class="sd"> The policy checkpoints are written by the trainer and read by the rollout worker.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">NFSDataStoreParameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param params: The parameters required to use the NFS data store.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
<span class="k">def</span> <span class="nf">deploy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploy the NFS server in an orchestrator if/when required.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_type</span> <span class="o">==</span> <span class="s2">&quot;kubernetes&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">deployed</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">deploy_k8s_nfs</span><span class="p">():</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">create_k8s_nfs_resources</span><span class="p">():</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">get_info</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span>
<span class="k">return</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PersistentVolumeClaimVolumeSource</span><span class="p">(</span>
<span class="n">claim_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pvc_name</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">undeploy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Undeploy the NFS server and resources from an orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_type</span> <span class="o">==</span> <span class="s2">&quot;kubernetes&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">deployed</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">undeploy_k8s_nfs</span><span class="p">():</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">delete_k8s_nfs_resources</span><span class="p">():</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">save_to_store</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">load_from_store</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">deploy_k8s_nfs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploy the NFS server in the Kubernetes orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span>
<span class="n">name</span> <span class="o">=</span> <span class="s2">&quot;nfs-server-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="s2">&quot;k8s.gcr.io/volume-nfs:0.8&quot;</span><span class="p">,</span>
<span class="n">ports</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ContainerPort</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;nfs&quot;</span><span class="p">,</span>
<span class="n">container_port</span><span class="o">=</span><span class="mi">2049</span><span class="p">,</span>
<span class="n">protocol</span><span class="o">=</span><span class="s2">&quot;TCP&quot;</span>
<span class="p">),</span>
<span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ContainerPort</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;rpcbind&quot;</span><span class="p">,</span>
<span class="n">container_port</span><span class="o">=</span><span class="mi">111</span>
<span class="p">),</span>
<span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ContainerPort</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;mountd&quot;</span><span class="p">,</span>
<span class="n">container_port</span><span class="o">=</span><span class="mi">20048</span>
<span class="p">),</span>
<span class="p">],</span>
<span class="n">volume_mounts</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1VolumeMount</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;nfs-host-path&#39;</span><span class="p">,</span>
<span class="n">mount_path</span><span class="o">=</span><span class="s1">&#39;/exports&#39;</span>
<span class="p">)],</span>
<span class="n">security_context</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1SecurityContext</span><span class="p">(</span><span class="n">privileged</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">],</span>
<span class="n">volumes</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Volume</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;nfs-host-path&quot;</span><span class="p">,</span>
<span class="n">host_path</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1HostPathVolumeSource</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="s1">&#39;/tmp/nfsexports-</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()))</span>
<span class="p">)]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">deployment_spec</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1DeploymentSpec</span><span class="p">(</span>
<span class="n">replicas</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="n">template</span><span class="p">,</span>
<span class="n">selector</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1LabelSelector</span><span class="p">(</span>
<span class="n">match_labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">deployment</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Deployment</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s1">&#39;apps/v1&#39;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s1">&#39;Deployment&#39;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">deployment_spec</span>
<span class="p">)</span>
<span class="n">k8s_apps_v1_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">AppsV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_apps_v1_api_client</span><span class="o">.</span><span class="n">create_namespaced_deployment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">deployment</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating nfs-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">k8s_core_v1_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="n">svc_name</span> <span class="o">=</span> <span class="s2">&quot;nfs-service-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Service</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s1">&#39;v1&#39;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s1">&#39;Service&#39;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">svc_name</span>
<span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ServiceSpec</span><span class="p">(</span>
<span class="n">selector</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">name</span><span class="p">},</span>
<span class="n">ports</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ServicePort</span><span class="p">(</span>
<span class="n">protocol</span><span class="o">=</span><span class="s1">&#39;TCP&#39;</span><span class="p">,</span>
<span class="n">port</span><span class="o">=</span><span class="mi">2049</span><span class="p">,</span>
<span class="n">target_port</span><span class="o">=</span><span class="mi">2049</span>
<span class="p">)]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">svc_response</span> <span class="o">=</span> <span class="n">k8s_core_v1_api_client</span><span class="o">.</span><span class="n">create_namespaced_service</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">service</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">svc_name</span> <span class="o">=</span> <span class="n">svc_name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">server</span> <span class="o">=</span> <span class="n">svc_response</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">cluster_ip</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating a service for nfs-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">create_k8s_nfs_resources</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Create NFS resources such as PV and PVC in Kubernetes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span>
<span class="n">pv_name</span> <span class="o">=</span> <span class="s2">&quot;nfs-ckpt-pv-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="n">persistent_volume</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PersistentVolume</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s2">&quot;v1&quot;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s2">&quot;PersistentVolume&quot;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">pv_name</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">pv_name</span><span class="p">}</span>
<span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PersistentVolumeSpec</span><span class="p">(</span>
<span class="n">access_modes</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;ReadWriteMany&quot;</span><span class="p">],</span>
<span class="n">nfs</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1NFSVolumeSource</span><span class="p">(</span>
<span class="n">path</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">path</span><span class="p">,</span>
<span class="n">server</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">server</span>
<span class="p">),</span>
<span class="n">capacity</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;storage&#39;</span><span class="p">:</span> <span class="s1">&#39;10Gi&#39;</span><span class="p">},</span>
<span class="n">storage_class_name</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">k8s_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_api_client</span><span class="o">.</span><span class="n">create_persistent_volume</span><span class="p">(</span><span class="n">persistent_volume</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pv_name</span> <span class="o">=</span> <span class="n">pv_name</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating the NFS PV&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">pvc_name</span> <span class="o">=</span> <span class="s2">&quot;nfs-ckpt-pvc-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="n">persistent_volume_claim</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PersistentVolumeClaim</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s2">&quot;v1&quot;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s2">&quot;PersistentVolumeClaim&quot;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">pvc_name</span>
<span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PersistentVolumeClaimSpec</span><span class="p">(</span>
<span class="n">access_modes</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;ReadWriteMany&quot;</span><span class="p">],</span>
<span class="n">resources</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ResourceRequirements</span><span class="p">(</span>
<span class="n">requests</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;storage&#39;</span><span class="p">:</span> <span class="s1">&#39;10Gi&#39;</span><span class="p">}</span>
<span class="p">),</span>
<span class="n">selector</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1LabelSelector</span><span class="p">(</span>
<span class="n">match_labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pv_name</span><span class="p">}</span>
<span class="p">),</span>
<span class="n">storage_class_name</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_api_client</span><span class="o">.</span><span class="n">create_namespaced_persistent_volume_claim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">persistent_volume_claim</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pvc_name</span> <span class="o">=</span> <span class="n">pvc_name</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating the NFS PVC&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">undeploy_k8s_nfs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span>
<span class="n">del_options</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1DeleteOptions</span><span class="p">()</span>
<span class="n">k8s_apps_v1_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">AppsV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_apps_v1_api_client</span><span class="o">.</span><span class="n">delete_namespaced_deployment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">del_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting nfs-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">k8s_core_v1_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_core_v1_api_client</span><span class="o">.</span><span class="n">delete_namespaced_service</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">svc_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">del_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting the service for nfs-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">delete_k8s_nfs_resources</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Delete NFS resources such as PV and PVC from the Kubernetes orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span>
<span class="n">del_options</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1DeleteOptions</span><span class="p">()</span>
<span class="n">k8s_api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_api_client</span><span class="o">.</span><span class="n">delete_persistent_volume</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pv_name</span><span class="p">,</span> <span class="n">del_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting NFS PV&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">k8s_api_client</span><span class="o">.</span><span class="n">delete_namespaced_persistent_volume_claim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">pvc_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">del_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting NFS PVC&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../../../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
@@ -0,0 +1,384 @@
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>rl_coach.data_stores.s3_data_store &mdash; Reinforcement Learning Coach 0.11.0 documentation</title>
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
<script src="../../../_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="../../../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../../../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../index.html">Module code</a> &raquo;</li>
<li>rl_coach.data_stores.s3_data_store</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for rl_coach.data_stores.s3_data_store</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
<span class="kn">from</span> <span class="nn">minio</span> <span class="k">import</span> <span class="n">Minio</span>
<span class="kn">from</span> <span class="nn">minio.error</span> <span class="k">import</span> <span class="n">ResponseError</span>
<span class="kn">from</span> <span class="nn">configparser</span> <span class="k">import</span> <span class="n">ConfigParser</span><span class="p">,</span> <span class="n">Error</span>
<span class="kn">from</span> <span class="nn">rl_coach.checkpoint</span> <span class="k">import</span> <span class="n">CheckpointStateFile</span>
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">SyncFiles</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">io</span>
<span class="k">class</span> <span class="nc">S3DataStoreParameters</span><span class="p">(</span><span class="n">DataStoreParameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ds_params</span><span class="p">,</span> <span class="n">creds_file</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">end_point</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">expt_dir</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">ds_params</span><span class="o">.</span><span class="n">store_type</span><span class="p">,</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_type</span><span class="p">,</span> <span class="n">ds_params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">creds_file</span> <span class="o">=</span> <span class="n">creds_file</span>
<span class="bp">self</span><span class="o">.</span><span class="n">end_point</span> <span class="o">=</span> <span class="n">end_point</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span> <span class="o">=</span> <span class="n">bucket_name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint_dir</span> <span class="o">=</span> <span class="n">checkpoint_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">expt_dir</span> <span class="o">=</span> <span class="n">expt_dir</span>
<div class="viewcode-block" id="S3DataStore"><a class="viewcode-back" href="../../../components/data_stores/index.html#rl_coach.data_stores.s3_data_store.S3DataStore">[docs]</a><span class="k">class</span> <span class="nc">S3DataStore</span><span class="p">(</span><span class="n">DataStore</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An implementation of the data store using S3 for storing policy checkpoints when using Coach in distributed mode.</span>
<span class="sd"> The policy checkpoints are written by the trainer and read by the rollout worker.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">S3DataStoreParameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param params: The parameters required to use the S3 data store.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">(</span><span class="n">S3DataStore</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
<span class="n">access_key</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">secret_key</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">creds_file</span><span class="p">:</span>
<span class="n">config</span> <span class="o">=</span> <span class="n">ConfigParser</span><span class="p">()</span>
<span class="n">config</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">creds_file</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">access_key</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;default&#39;</span><span class="p">,</span> <span class="s1">&#39;aws_access_key_id&#39;</span><span class="p">)</span>
<span class="n">secret_key</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;default&#39;</span><span class="p">,</span> <span class="s1">&#39;aws_secret_access_key&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="n">Error</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error when reading S3 credentials file: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">access_key</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;ACCESS_KEY_ID&#39;</span><span class="p">)</span>
<span class="n">secret_key</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;SECRET_ACCESS_KEY&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span> <span class="o">=</span> <span class="n">Minio</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">end_point</span><span class="p">,</span> <span class="n">access_key</span><span class="o">=</span><span class="n">access_key</span><span class="p">,</span> <span class="n">secret_key</span><span class="o">=</span><span class="n">secret_key</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">deploy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">get_info</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="s2">&quot;s3://</span><span class="si">{}</span><span class="s2">/</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">undeploy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">save_to_store</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> save_to_store() uploads the policy checkpoint, gifs and videos to the S3 data store. It reads the checkpoint state files and</span>
<span class="sd"> uploads only the latest checkpoint files to S3. It is used by the trainer in Coach when used in the distributed mode.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># remove lock file if it exists</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">remove_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">LOCKFILE</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
<span class="c1"># Acquire lock</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">put_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">LOCKFILE</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">io</span><span class="o">.</span><span class="n">BytesIO</span><span class="p">(</span><span class="sa">b</span><span class="s1">&#39;&#39;</span><span class="p">),</span> <span class="mi">0</span><span class="p">)</span>
<span class="n">state_file</span> <span class="o">=</span> <span class="n">CheckpointStateFile</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">))</span>
<span class="k">if</span> <span class="n">state_file</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span>
<span class="n">ckpt_state</span> <span class="o">=</span> <span class="n">state_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="n">checkpoint_file</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">root</span><span class="p">,</span> <span class="n">dirs</span><span class="p">,</span> <span class="n">files</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">walk</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">):</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">files</span><span class="p">:</span>
<span class="k">if</span> <span class="n">filename</span> <span class="o">==</span> <span class="n">CheckpointStateFile</span><span class="o">.</span><span class="n">checkpoint_state_filename</span><span class="p">:</span>
<span class="n">checkpoint_file</span> <span class="o">=</span> <span class="p">(</span><span class="n">root</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="k">continue</span>
<span class="k">if</span> <span class="n">filename</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="n">ckpt_state</span><span class="o">.</span><span class="n">name</span><span class="p">):</span>
<span class="n">abs_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">root</span><span class="p">,</span> <span class="n">filename</span><span class="p">))</span>
<span class="n">rel_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">abs_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fput_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">rel_name</span><span class="p">,</span> <span class="n">abs_name</span><span class="p">)</span>
<span class="n">abs_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">checkpoint_file</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">checkpoint_file</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
<span class="n">rel_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">abs_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fput_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">rel_name</span><span class="p">,</span> <span class="n">abs_name</span><span class="p">)</span>
<span class="c1"># release lock</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">remove_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">LOCKFILE</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">):</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">):</span>
<span class="k">if</span> <span class="n">filename</span><span class="o">.</span><span class="n">endswith</span><span class="p">((</span><span class="s2">&quot;.csv&quot;</span><span class="p">,</span> <span class="s2">&quot;.json&quot;</span><span class="p">)):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fput_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">))</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;videos&#39;</span><span class="p">)):</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;videos&#39;</span><span class="p">)):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fput_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;videos&#39;</span><span class="p">,</span> <span class="n">filename</span><span class="p">))</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;gifs&#39;</span><span class="p">)):</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;gifs&#39;</span><span class="p">)):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fput_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">expt_dir</span><span class="p">,</span> <span class="s1">&#39;gifs&#39;</span><span class="p">,</span> <span class="n">filename</span><span class="p">))</span>
<span class="k">except</span> <span class="n">ResponseError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while saving to S3&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load_from_store</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> load_from_store() downloads a new checkpoint from the S3 data store when it is not available locally. It is used</span>
<span class="sd"> by the rollout workers when using Coach in distributed mode.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">state_file</span> <span class="o">=</span> <span class="n">CheckpointStateFile</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">))</span>
<span class="c1"># wait until lock is removed</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">objects</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">list_objects_v2</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">LOCKFILE</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="n">objects</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># fetch checkpoint state file from S3</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fget_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">state_file</span><span class="o">.</span><span class="n">filename</span><span class="p">,</span> <span class="n">state_file</span><span class="o">.</span><span class="n">path</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">break</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="c1"># Check if there&#39;s a finished file</span>
<span class="n">objects</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">list_objects_v2</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">FINISHED</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="n">objects</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fget_object</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">FINISHED</span><span class="o">.</span><span class="n">value</span><span class="p">,</span>
<span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">SyncFiles</span><span class="o">.</span><span class="n">FINISHED</span><span class="o">.</span><span class="n">value</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">pass</span>
<span class="n">checkpoint_state</span> <span class="o">=</span> <span class="n">state_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">if</span> <span class="n">checkpoint_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">objects</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">list_objects_v2</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">checkpoint_state</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">recursive</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">obj</span> <span class="ow">in</span> <span class="n">objects</span><span class="p">:</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">obj</span><span class="o">.</span><span class="n">object_name</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mc</span><span class="o">.</span><span class="n">fget_object</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">obj</span><span class="o">.</span><span class="n">object_name</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="k">except</span> <span class="n">ResponseError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while loading from S3&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../../../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -666,7 +671,15 @@
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">[</span><span class="s1">&#39;observation&#39;</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span></div>
<span class="k">def</span> <span class="nf">get_target_success_rate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_success_rate</span></div>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_success_rate</span>
<div class="viewcode-block" id="Environment.close"><a class="viewcode-back" href="../../../components/environments/index.html#rl_coach.environments.environment.Environment.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Clean up steps.</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div></div>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -191,6 +196,7 @@
<span class="kn">import</span> <span class="nn">gym</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">enum</span> <span class="k">import</span> <span class="n">IntEnum</span>
<span class="kn">import</span> <span class="nn">scipy.ndimage</span>
<span class="kn">from</span> <span class="nn">rl_coach.graph_managers.graph_manager</span> <span class="k">import</span> <span class="n">ScheduleParameters</span>
@@ -219,7 +225,7 @@
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">RunPhase</span><span class="p">,</span> <span class="n">EnvironmentSteps</span>
<span class="kn">from</span> <span class="nn">rl_coach.environments.environment</span> <span class="k">import</span> <span class="n">Environment</span><span class="p">,</span> <span class="n">EnvironmentParameters</span><span class="p">,</span> <span class="n">LevelSelection</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">DiscreteActionSpace</span><span class="p">,</span> <span class="n">BoxActionSpace</span><span class="p">,</span> <span class="n">ImageObservationSpace</span><span class="p">,</span> <span class="n">VectorObservationSpace</span><span class="p">,</span> \
<span class="n">StateSpace</span><span class="p">,</span> <span class="n">RewardSpace</span>
<span class="n">PlanarMapsObservationSpace</span><span class="p">,</span> <span class="n">TensorObservationSpace</span><span class="p">,</span> <span class="n">StateSpace</span><span class="p">,</span> <span class="n">RewardSpace</span>
<span class="kn">from</span> <span class="nn">rl_coach.filters.filter</span> <span class="k">import</span> <span class="n">NoInputFilter</span><span class="p">,</span> <span class="n">NoOutputFilter</span>
<span class="kn">from</span> <span class="nn">rl_coach.filters.reward.reward_clipping_filter</span> <span class="k">import</span> <span class="n">RewardClippingFilter</span>
<span class="kn">from</span> <span class="nn">rl_coach.filters.observation.observation_rescale_to_size_filter</span> <span class="k">import</span> <span class="n">ObservationRescaleToSizeFilter</span>
@@ -239,6 +245,7 @@
<span class="bp">self</span><span class="o">.</span><span class="n">random_initialization_steps</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">max_over_num_frames</span> <span class="o">=</span> <span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">additional_simulator_parameters</span> <span class="o">=</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space_type</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">path</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
@@ -258,7 +265,7 @@
<span class="n">gym_roboschool_envs</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;inverted_pendulum&#39;</span><span class="p">,</span> <span class="s1">&#39;inverted_pendulum_swingup&#39;</span><span class="p">,</span> <span class="s1">&#39;inverted_double_pendulum&#39;</span><span class="p">,</span> <span class="s1">&#39;reacher&#39;</span><span class="p">,</span>
<span class="s1">&#39;hopper&#39;</span><span class="p">,</span> <span class="s1">&#39;walker2d&#39;</span><span class="p">,</span> <span class="s1">&#39;half_cheetah&#39;</span><span class="p">,</span> <span class="s1">&#39;ant&#39;</span><span class="p">,</span> <span class="s1">&#39;humanoid&#39;</span><span class="p">,</span> <span class="s1">&#39;humanoid_flagrun&#39;</span><span class="p">,</span>
<span class="s1">&#39;humanoid_flagrun_harder&#39;</span><span class="p">,</span> <span class="s1">&#39;pong&#39;</span><span class="p">]</span>
<span class="n">roboschool_v0</span> <span class="o">=</span> <span class="p">{</span><span class="n">e</span><span class="p">:</span> <span class="s2">&quot;</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">lower_under_to_upper</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;-v0&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">gym_roboschool_envs</span><span class="p">}</span>
<span class="n">roboschool_v1</span> <span class="o">=</span> <span class="p">{</span><span class="n">e</span><span class="p">:</span> <span class="s2">&quot;Roboschool</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">lower_under_to_upper</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;-v1&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">gym_roboschool_envs</span><span class="p">}</span>
<span class="c1"># Mujoco</span>
<span class="n">gym_mujoco_envs</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;inverted_pendulum&#39;</span><span class="p">,</span> <span class="s1">&#39;inverted_double_pendulum&#39;</span><span class="p">,</span> <span class="s1">&#39;reacher&#39;</span><span class="p">,</span> <span class="s1">&#39;hopper&#39;</span><span class="p">,</span> <span class="s1">&#39;walker2d&#39;</span><span class="p">,</span> <span class="s1">&#39;half_cheetah&#39;</span><span class="p">,</span>
@@ -351,11 +358,26 @@
<span class="c1"># Environment</span>
<span class="k">class</span> <span class="nc">ObservationSpaceType</span><span class="p">(</span><span class="n">IntEnum</span><span class="p">):</span>
<span class="n">Tensor</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">Image</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">Vector</span> <span class="o">=</span> <span class="mi">2</span>
<div class="viewcode-block" id="GymEnvironment"><a class="viewcode-back" href="../../../components/environments/index.html#rl_coach.environments.gym_environment.GymEnvironment">[docs]</a><span class="k">class</span> <span class="nc">GymEnvironment</span><span class="p">(</span><span class="n">Environment</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">LevelSelection</span><span class="p">,</span> <span class="n">frame_skip</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">visualization_parameters</span><span class="p">:</span> <span class="n">VisualizationParameters</span><span class="p">,</span>
<span class="n">target_success_rate</span><span class="p">:</span> <span class="nb">float</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">additional_simulator_parameters</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{},</span> <span class="n">seed</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">human_control</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">custom_reward_threshold</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">random_initialization_steps</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_over_num_frames</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">LevelSelection</span><span class="p">,</span>
<span class="n">frame_skip</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">visualization_parameters</span><span class="p">:</span> <span class="n">VisualizationParameters</span><span class="p">,</span>
<span class="n">target_success_rate</span><span class="p">:</span> <span class="nb">float</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
<span class="n">additional_simulator_parameters</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{},</span>
<span class="n">seed</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">human_control</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">custom_reward_threshold</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">random_initialization_steps</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">max_over_num_frames</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">observation_space_type</span><span class="p">:</span> <span class="n">ObservationSpaceType</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param level: (str)</span>
<span class="sd"> A string representing the gym level to run. This can also be a LevelSelection object.</span>
@@ -390,6 +412,11 @@
<span class="sd"> This value will be used for merging multiple frames into a single frame by taking the maximum value for each</span>
<span class="sd"> of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects</span>
<span class="sd"> can be seen in one frame but disappear in the next.</span>
<span class="sd"> :param observation_space_type:</span>
<span class="sd"> This value will be used for generating observation space. Allows a custom space. Should be one of</span>
<span class="sd"> ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions</span>
<span class="sd"> of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">level</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">frame_skip</span><span class="p">,</span> <span class="n">human_control</span><span class="p">,</span> <span class="n">custom_reward_threshold</span><span class="p">,</span>
<span class="n">visualization_parameters</span><span class="p">,</span> <span class="n">target_success_rate</span><span class="p">)</span>
@@ -455,6 +482,7 @@
<span class="c1"># frame skip and max between consecutive frames</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_robotics_env</span> <span class="o">=</span> <span class="s1">&#39;robotics&#39;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span> <span class="o">=</span> <span class="s1">&#39;mujoco&#39;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_roboschool_env</span> <span class="o">=</span> <span class="s1">&#39;roboschool&#39;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span> <span class="o">=</span> <span class="s1">&#39;Atari&#39;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">timelimit_env_wrapper</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span><span class="p">:</span>
@@ -479,20 +507,40 @@
<span class="n">state_space</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">spaces</span>
<span class="k">for</span> <span class="n">observation_space_name</span><span class="p">,</span> <span class="n">observation_space</span> <span class="ow">in</span> <span class="n">state_space</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
<span class="k">if</span> <span class="n">observation_space_type</span> <span class="o">==</span> <span class="n">ObservationSpaceType</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
<span class="c1"># we consider arbitrary input tensor which does not necessarily represent images</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="n">observation_space_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">TensorObservationSpace</span><span class="p">(</span>
<span class="n">shape</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">),</span>
<span class="n">low</span><span class="o">=</span><span class="n">observation_space</span><span class="o">.</span><span class="n">low</span><span class="p">,</span>
<span class="n">high</span><span class="o">=</span><span class="n">observation_space</span><span class="o">.</span><span class="n">high</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">observation_space_type</span> <span class="o">==</span> <span class="n">ObservationSpaceType</span><span class="o">.</span><span class="n">Image</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
<span class="c1"># we assume gym has image observations (with arbitrary number of channels) where their values are</span>
<span class="c1"># within 0-255, and where the channel dimension is the last dimension</span>
<span class="k">if</span> <span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="n">observation_space_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">ImageObservationSpace</span><span class="p">(</span>
<span class="n">shape</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">),</span>
<span class="n">high</span><span class="o">=</span><span class="mi">255</span><span class="p">,</span>
<span class="n">channels_axis</span><span class="o">=-</span><span class="mi">1</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># For any number of channels other than 1 or 3, use the generic PlanarMaps space</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="n">observation_space_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">PlanarMapsObservationSpace</span><span class="p">(</span>
<span class="n">shape</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">),</span>
<span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">high</span><span class="o">=</span><span class="mi">255</span><span class="p">,</span>
<span class="n">channels_axis</span><span class="o">=-</span><span class="mi">1</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">observation_space_type</span> <span class="o">==</span> <span class="n">ObservationSpaceType</span><span class="o">.</span><span class="n">Vector</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="n">observation_space_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">VectorObservationSpace</span><span class="p">(</span>
<span class="n">shape</span><span class="o">=</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
<span class="n">low</span><span class="o">=</span><span class="n">observation_space</span><span class="o">.</span><span class="n">low</span><span class="p">,</span>
<span class="n">high</span><span class="o">=</span><span class="n">observation_space</span><span class="o">.</span><span class="n">high</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">screen</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">&quot;Failed to instantiate Gym environment class </span><span class="si">%s</span><span class="s2"> with observation space type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span>
<span class="p">(</span><span class="n">env_class</span><span class="p">,</span> <span class="n">observation_space_type</span><span class="p">),</span> <span class="n">crash</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;desired_goal&#39;</span> <span class="ow">in</span> <span class="n">state_space</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">goal_space</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="s1">&#39;desired_goal&#39;</span><span class="p">]</span>
@@ -618,8 +666,7 @@
<span class="sd"> :param camera_idx: The index of the camera to use. Should be defined in the model</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span><span class="o">.</span><span class="n">cam</span><span class="o">.</span><span class="n">fixedcamid</span> <span class="o">!=</span> <span class="n">camera_idx</span> <span class="ow">and</span>\
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span><span class="o">.</span><span class="n">_ncam</span> <span class="o">&gt;</span> <span class="n">camera_idx</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span><span class="o">.</span><span class="n">cam</span><span class="o">.</span><span class="n">fixedcamid</span> <span class="o">!=</span> <span class="n">camera_idx</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">mujoco_py.generated</span> <span class="k">import</span> <span class="n">const</span>
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span><span class="o">.</span><span class="n">cam</span><span class="o">.</span><span class="n">type</span> <span class="o">=</span> <span class="n">const</span><span class="o">.</span><span class="n">CAMERA_FIXED</span>
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">viewer</span><span class="o">.</span><span class="n">cam</span><span class="o">.</span><span class="n">fixedcamid</span> <span class="o">=</span> <span class="n">camera_idx</span>
@@ -633,7 +680,7 @@
<span class="k">def</span> <span class="nf">_render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span><span class="p">)</span>
<span class="c1"># required for setting up a fixed camera for mujoco</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_roboschool_env</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_mujoco_camera</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">get_rendered_image</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
@@ -643,12 +690,20 @@
<span class="k">else</span><span class="p">:</span>
<span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">&#39;rgb_array&#39;</span><span class="p">)</span>
<span class="c1"># required for setting up a fixed camera for mujoco</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_roboschool_env</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_mujoco_camera</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="n">image</span>
<span class="k">def</span> <span class="nf">get_target_success_rate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_success_rate</span></div>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_success_rate</span>
<span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Clean up to close rendering windows.</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -194,7 +199,7 @@
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">rl_coach.agents.dqn_agent</span> <span class="k">import</span> <span class="n">DQNAgentParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.layers</span> <span class="k">import</span> <span class="n">NoisyNetDense</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.layers</span> <span class="k">import</span> <span class="n">NoisyNetDense</span>
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">AgentParameters</span><span class="p">,</span> <span class="n">NetworkParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">ActionSpace</span><span class="p">,</span> <span class="n">BoxActionSpace</span><span class="p">,</span> <span class="n">DiscreteActionSpace</span>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -188,14 +193,16 @@
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">pickle</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">List</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.shared_variables</span> <span class="k">import</span> <span class="n">SharedRunningStats</span>
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">ObservationType</span>
<span class="kn">from</span> <span class="nn">rl_coach.filters.observation.observation_filter</span> <span class="k">import</span> <span class="n">ObservationFilter</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">ObservationSpace</span>
<span class="kn">from</span> <span class="nn">rl_coach.utilities.shared_running_stats</span> <span class="k">import</span> <span class="n">NumpySharedRunningStats</span><span class="p">,</span> <span class="n">NumpySharedRunningStats</span>
<div class="viewcode-block" id="ObservationNormalizationFilter"><a class="viewcode-back" href="../../../../components/filters/input_filters.html#rl_coach.filters.observation.ObservationNormalizationFilter">[docs]</a><span class="k">class</span> <span class="nc">ObservationNormalizationFilter</span><span class="p">(</span><span class="n">ObservationFilter</span><span class="p">):</span>
@@ -217,13 +224,20 @@
<span class="bp">self</span><span class="o">.</span><span class="n">supports_batching</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">def</span> <span class="nf">set_device</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">set_device</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An optional function that allows the filter to get the device if it is required to use tensorflow ops</span>
<span class="sd"> :param device: the device to use</span>
<span class="sd"> :memory_backend_params: if not None, holds params for a memory backend for sharing data (e.g. Redis)</span>
<span class="sd"> :param mode: the arithmetic module to use {&#39;tf&#39; | &#39;numpy&#39;}</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span> <span class="o">=</span> <span class="n">SharedRunningStats</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">create_ops</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s1">&#39;tf&#39;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.shared_variables</span> <span class="k">import</span> <span class="n">TFSharedRunningStats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span> <span class="o">=</span> <span class="n">TFSharedRunningStats</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">create_ops</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">pubsub_params</span><span class="o">=</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">mode</span> <span class="o">==</span> <span class="s1">&#39;numpy&#39;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span> <span class="o">=</span> <span class="n">NumpySharedRunningStats</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
<span class="n">pubsub_params</span><span class="o">=</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">set_session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sess</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
@@ -241,13 +255,18 @@
<span class="bp">self</span><span class="o">.</span><span class="n">last_mean</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">mean</span>
<span class="bp">self</span><span class="o">.</span><span class="n">last_stdev</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">std</span>
<span class="c1"># TODO: make sure that a batch is given here</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="n">observations</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">get_filtered_observation_space</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_observation_space</span><span class="p">:</span> <span class="n">ObservationSpace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ObservationSpace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">create_ops</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">input_observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">input_observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span>
<span class="n">clip_values</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clip_min</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">clip_max</span><span class="p">))</span>
<span class="k">return</span> <span class="n">input_observation_space</span></div>
<span class="k">return</span> <span class="n">input_observation_space</span>
<span class="k">def</span> <span class="nf">save_state_to_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">save_state_to_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">restore_state_from_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_observation_stats</span><span class="o">.</span><span class="n">restore_state_from_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span></div>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -188,14 +193,14 @@
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.shared_variables</span> <span class="k">import</span> <span class="n">SharedRunningStats</span>
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">RewardType</span>
<span class="kn">from</span> <span class="nn">rl_coach.filters.reward.reward_filter</span> <span class="k">import</span> <span class="n">RewardFilter</span>
<span class="kn">from</span> <span class="nn">rl_coach.spaces</span> <span class="k">import</span> <span class="n">RewardSpace</span>
<span class="kn">from</span> <span class="nn">rl_coach.utilities.shared_running_stats</span> <span class="k">import</span> <span class="n">NumpySharedRunningStats</span>
<div class="viewcode-block" id="RewardNormalizationFilter"><a class="viewcode-back" href="../../../../components/filters/input_filters.html#rl_coach.filters.reward.RewardNormalizationFilter">[docs]</a><span class="k">class</span> <span class="nc">RewardNormalizationFilter</span><span class="p">(</span><span class="n">RewardFilter</span><span class="p">):</span>
@@ -214,13 +219,19 @@
<span class="bp">self</span><span class="o">.</span><span class="n">clip_max</span> <span class="o">=</span> <span class="n">clip_max</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">def</span> <span class="nf">set_device</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">set_device</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">device</span><span class="p">,</span> <span class="n">memory_backend_params</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;numpy&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An optional function that allows the filter to get the device if it is required to use tensorflow ops</span>
<span class="sd"> :param device: the device to use</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span> <span class="o">=</span> <span class="n">SharedRunningStats</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;rewards_stats&#39;</span><span class="p">,</span>
<span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s1">&#39;tf&#39;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">rl_coach.architectures.tensorflow_components.shared_variables</span> <span class="k">import</span> <span class="n">TFSharedRunningStats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span> <span class="o">=</span> <span class="n">TFSharedRunningStats</span><span class="p">(</span><span class="n">device</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;rewards_stats&#39;</span><span class="p">,</span> <span class="n">create_ops</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">pubsub_params</span><span class="o">=</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">mode</span> <span class="o">==</span> <span class="s1">&#39;numpy&#39;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span> <span class="o">=</span> <span class="n">NumpySharedRunningStats</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;rewards_stats&#39;</span><span class="p">,</span>
<span class="n">pubsub_params</span><span class="o">=</span><span class="n">memory_backend_params</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">set_session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sess</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
@@ -242,7 +253,13 @@
<span class="k">return</span> <span class="n">reward</span>
<span class="k">def</span> <span class="nf">get_filtered_reward_space</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_reward_space</span><span class="p">:</span> <span class="n">RewardSpace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RewardSpace</span><span class="p">:</span>
<span class="k">return</span> <span class="n">input_reward_space</span></div>
<span class="k">return</span> <span class="n">input_reward_space</span>
<span class="k">def</span> <span class="nf">save_state_to_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span><span class="o">.</span><span class="n">save_state_to_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">restore_state_from_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_rewards_stats</span><span class="o">.</span><span class="n">restore_state_from_checkpoint</span><span class="p">(</span><span class="n">checkpoint_dir</span><span class="p">,</span> <span class="n">checkpoint_prefix</span><span class="p">)</span></div>
</pre></div>
</div>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -0,0 +1,441 @@
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>rl_coach.memories.backend.redis &mdash; Reinforcement Learning Coach 0.11.0 documentation</title>
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<link href="../../../../_static/css/custom.css" rel="stylesheet" type="text/css">
<script src="../../../../_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="../../../../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../../../../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../../index.html">Module code</a> &raquo;</li>
<li>rl_coach.memories.backend.redis</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for rl_coach.memories.backend.redis</h1><div class="highlight"><pre>
<span></span>
<span class="kn">import</span> <span class="nn">redis</span>
<span class="kn">import</span> <span class="nn">pickle</span>
<span class="kn">import</span> <span class="nn">uuid</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">from</span> <span class="nn">rl_coach.memories.backend.memory</span> <span class="k">import</span> <span class="n">MemoryBackend</span><span class="p">,</span> <span class="n">MemoryBackendParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span><span class="p">,</span> <span class="n">Episode</span><span class="p">,</span> <span class="n">EnvironmentSteps</span><span class="p">,</span> <span class="n">EnvironmentEpisodes</span>
<span class="k">class</span> <span class="nc">RedisPubSubMemoryBackendParameters</span><span class="p">(</span><span class="n">MemoryBackendParameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">redis_address</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">redis_port</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">6379</span><span class="p">,</span> <span class="n">channel</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span><span class="s2">&quot;channel-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()),</span>
<span class="n">orchestrator_params</span><span class="p">:</span> <span class="nb">dict</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">run_type</span><span class="o">=</span><span class="s1">&#39;trainer&#39;</span><span class="p">,</span> <span class="n">orchestrator_type</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;kubernetes&quot;</span><span class="p">,</span> <span class="n">deployed</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_address</span> <span class="o">=</span> <span class="n">redis_address</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_port</span> <span class="o">=</span> <span class="n">redis_port</span>
<span class="bp">self</span><span class="o">.</span><span class="n">channel</span> <span class="o">=</span> <span class="n">channel</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">orchestrator_params</span><span class="p">:</span>
<span class="n">orchestrator_params</span> <span class="o">=</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">orchestrator_params</span> <span class="o">=</span> <span class="n">orchestrator_params</span>
<span class="bp">self</span><span class="o">.</span><span class="n">run_type</span> <span class="o">=</span> <span class="n">run_type</span>
<span class="bp">self</span><span class="o">.</span><span class="n">store_type</span> <span class="o">=</span> <span class="s2">&quot;redispubsub&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">orchestrator_type</span> <span class="o">=</span> <span class="n">orchestrator_type</span>
<span class="bp">self</span><span class="o">.</span><span class="n">deployed</span> <span class="o">=</span> <span class="n">deployed</span>
<div class="viewcode-block" id="RedisPubSubBackend"><a class="viewcode-back" href="../../../../components/memory_backends/index.html#rl_coach.memories.backend.redis.RedisPubSubBackend">[docs]</a><span class="k">class</span> <span class="nc">RedisPubSubBackend</span><span class="p">(</span><span class="n">MemoryBackend</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A memory backend which transfers the experiences from the rollout to the training worker using Redis Pub/Sub in</span>
<span class="sd"> Coach when distributed mode is used.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">RedisPubSubMemoryBackendParameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param params: The Redis parameters to be used with this Redis Pub/Sub instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_connection</span> <span class="o">=</span> <span class="n">redis</span><span class="o">.</span><span class="n">Redis</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_address</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_port</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span> <span class="o">=</span> <span class="s1">&#39;redis-server-</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_service_name</span> <span class="o">=</span> <span class="s1">&#39;redis-service-</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="k">def</span> <span class="nf">store</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param obj: The object to store in memory. The object is either a Tranisition or Episode type.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_connection</span><span class="o">.</span><span class="n">publish</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">channel</span><span class="p">,</span> <span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">obj</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">deploy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploy the Redis Pub/Sub service in an orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">deployed</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_type</span> <span class="o">==</span> <span class="s1">&#39;kubernetes&#39;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">deploy_kubernetes</span><span class="p">()</span>
<span class="c1"># Wait till subscribe to the channel is possible or else it will cause delays in the trainer.</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">deploy_kubernetes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploy the Redis Pub/Sub service in Kubernetes orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="s1">&#39;namespace&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;default&quot;</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="s1">&#39;redis:4-alpine&#39;</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">deployment_spec</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1DeploymentSpec</span><span class="p">(</span>
<span class="n">replicas</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="n">template</span><span class="p">,</span>
<span class="n">selector</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1LabelSelector</span><span class="p">(</span>
<span class="n">match_labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">}</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">deployment</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1Deployment</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s1">&#39;apps/v1&#39;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s1">&#39;Deployment&#39;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">deployment_spec</span>
<span class="p">)</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">AppsV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">create_namespaced_deployment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">],</span> <span class="n">deployment</span><span class="p">)</span>
<span class="k">except</span> <span class="n">client</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating redis-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">core_v1_api</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1Service</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s1">&#39;v1&#39;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s1">&#39;Service&#39;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">redis_service_name</span>
<span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">client</span><span class="o">.</span><span class="n">V1ServiceSpec</span><span class="p">(</span>
<span class="n">selector</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">},</span>
<span class="n">ports</span><span class="o">=</span><span class="p">[</span><span class="n">client</span><span class="o">.</span><span class="n">V1ServicePort</span><span class="p">(</span>
<span class="n">protocol</span><span class="o">=</span><span class="s1">&#39;TCP&#39;</span><span class="p">,</span>
<span class="n">port</span><span class="o">=</span><span class="mi">6379</span><span class="p">,</span>
<span class="n">target_port</span><span class="o">=</span><span class="mi">6379</span>
<span class="p">)]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">core_v1_api</span><span class="o">.</span><span class="n">create_namespaced_service</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">],</span> <span class="n">service</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_address</span> <span class="o">=</span> <span class="s1">&#39;</span><span class="si">{}</span><span class="s1">.</span><span class="si">{}</span><span class="s1">.svc&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_service_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">]</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_port</span> <span class="o">=</span> <span class="mi">6379</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">except</span> <span class="n">client</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating a service for redis-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">def</span> <span class="nf">undeploy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Undeploy the Redis Pub/Sub service in an orchestrator.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">deployed</span><span class="p">:</span>
<span class="k">return</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">AppsV1Api</span><span class="p">()</span>
<span class="n">delete_options</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">V1DeleteOptions</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">delete_namespaced_deployment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">redis_server_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">],</span> <span class="n">delete_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">client</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting redis-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">delete_namespaced_service</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">redis_service_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">orchestrator_params</span><span class="p">[</span><span class="s1">&#39;namespace&#39;</span><span class="p">],</span> <span class="n">delete_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">client</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting redis-server&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">fetch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_consecutive_playing_steps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param num_consecutive_playing_steps: The number steps to fetch.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">RedisSub</span><span class="p">(</span><span class="n">redis_address</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_address</span><span class="p">,</span> <span class="n">redis_port</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_port</span><span class="p">,</span> <span class="n">channel</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">channel</span><span class="p">)</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">num_consecutive_playing_steps</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">subscribe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param agent: The agent in use.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">redis_sub</span> <span class="o">=</span> <span class="n">RedisSub</span><span class="p">(</span><span class="n">redis_address</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_address</span><span class="p">,</span> <span class="n">redis_port</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_port</span><span class="p">,</span> <span class="n">channel</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">channel</span><span class="p">)</span>
<span class="k">return</span> <span class="n">redis_sub</span>
<span class="k">def</span> <span class="nf">get_endpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="p">{</span><span class="s1">&#39;redis_address&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_address</span><span class="p">,</span>
<span class="s1">&#39;redis_port&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">redis_port</span><span class="p">}</span></div>
<span class="k">class</span> <span class="nc">RedisSub</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">redis_address</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;localhost&quot;</span><span class="p">,</span> <span class="n">redis_port</span><span class="p">:</span> <span class="nb">int</span><span class="o">=</span><span class="mi">6379</span><span class="p">,</span> <span class="n">channel</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;PubsubChannel&quot;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">redis_connection</span> <span class="o">=</span> <span class="n">redis</span><span class="o">.</span><span class="n">Redis</span><span class="p">(</span><span class="n">redis_address</span><span class="p">,</span> <span class="n">redis_port</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pubsub</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">redis_connection</span><span class="o">.</span><span class="n">pubsub</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">subscriber</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">channel</span> <span class="o">=</span> <span class="n">channel</span>
<span class="bp">self</span><span class="o">.</span><span class="n">subscriber</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pubsub</span><span class="o">.</span><span class="n">subscribe</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">channel</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_consecutive_playing_steps</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param num_consecutive_playing_steps: The number steps to fetch.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">transitions</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">episodes</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">steps</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">message</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pubsub</span><span class="o">.</span><span class="n">listen</span><span class="p">():</span>
<span class="k">if</span> <span class="n">message</span> <span class="ow">and</span> <span class="s1">&#39;data&#39;</span> <span class="ow">in</span> <span class="n">message</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">message</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">])</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span> <span class="o">==</span> <span class="n">Transition</span><span class="p">:</span>
<span class="n">transitions</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">obj</span><span class="o">.</span><span class="n">game_over</span><span class="p">:</span>
<span class="n">episodes</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">yield</span> <span class="n">obj</span>
<span class="k">elif</span> <span class="nb">type</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span> <span class="o">==</span> <span class="n">Episode</span><span class="p">:</span>
<span class="n">episodes</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">transitions</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">transitions</span><span class="p">)</span>
<span class="k">yield from</span> <span class="n">obj</span><span class="o">.</span><span class="n">transitions</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">num_consecutive_playing_steps</span><span class="p">)</span> <span class="o">==</span> <span class="n">EnvironmentSteps</span><span class="p">:</span>
<span class="n">steps</span> <span class="o">=</span> <span class="n">transitions</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">num_consecutive_playing_steps</span><span class="p">)</span> <span class="o">==</span> <span class="n">EnvironmentEpisodes</span><span class="p">:</span>
<span class="n">steps</span> <span class="o">=</span> <span class="n">episodes</span>
<span class="k">if</span> <span class="n">steps</span> <span class="o">&gt;=</span> <span class="n">num_consecutive_playing_steps</span><span class="o">.</span><span class="n">num_steps</span><span class="p">:</span>
<span class="k">break</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -444,14 +449,19 @@
<span class="k">def</span> <span class="nf">load_dnd</span><span class="p">(</span><span class="n">model_dir</span><span class="p">):</span>
<span class="n">max_id</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">latest_checkpoint_id</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
<span class="n">latest_checkpoint</span> <span class="o">=</span> <span class="s1">&#39;&#39;</span>
<span class="c1"># get all checkpoint files</span>
<span class="k">for</span> <span class="n">fname</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">model_dir</span><span class="p">):</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">model_dir</span><span class="p">,</span> <span class="n">fname</span><span class="p">)</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isdir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="ow">or</span> <span class="n">fname</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">&#39;srs&#39;</span><span class="p">:</span>
<span class="k">continue</span>
<span class="n">checkpoint_id</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">fname</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="n">checkpoint_id</span> <span class="o">&gt;</span> <span class="n">latest_checkpoint_id</span><span class="p">:</span>
<span class="n">latest_checkpoint</span> <span class="o">=</span> <span class="n">fname</span>
<span class="n">latest_checkpoint_id</span> <span class="o">=</span> <span class="n">checkpoint_id</span>
<span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="p">[</span><span class="n">s</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">model_dir</span><span class="p">)</span> <span class="k">if</span> <span class="n">s</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;.dnd&#39;</span><span class="p">)]:</span>
<span class="k">if</span> <span class="nb">int</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> <span class="o">&gt;</span> <span class="n">max_id</span><span class="p">:</span>
<span class="n">max_id</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">model_path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">max_id</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;.dnd&#39;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">model_dir</span><span class="p">,</span> <span class="n">model_path</span><span class="p">),</span> <span class="s1">&#39;rb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">model_dir</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">latest_checkpoint</span><span class="p">)),</span> <span class="s1">&#39;rb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">DND</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">DND</span><span class="o">.</span><span class="n">num_actions</span><span class="p">):</span>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -0,0 +1,627 @@
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>rl_coach.orchestrators.kubernetes_orchestrator &mdash; Reinforcement Learning Coach 0.11.0 documentation</title>
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
<script src="../../../_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="../../../index.html" class="icon icon-home"> Reinforcement Learning Coach
<img src="../../../_static/dark_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
</ul>
<p class="caption"><span class="caption-text">Design</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_agent.html">Adding a New Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/add_env.html">Adding a New Environment</a></li>
</ul>
<p class="caption"><span class="caption-text">Components</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../components/additional_parameters.html">Additional Parameters</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">Reinforcement Learning Coach</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../index.html">Module code</a> &raquo;</li>
<li>rl_coach.orchestrators.kubernetes_orchestrator</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for rl_coach.orchestrators.kubernetes_orchestrator</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">uuid</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">enum</span> <span class="k">import</span> <span class="n">Enum</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">List</span>
<span class="kn">from</span> <span class="nn">configparser</span> <span class="k">import</span> <span class="n">ConfigParser</span><span class="p">,</span> <span class="n">Error</span>
<span class="kn">from</span> <span class="nn">multiprocessing</span> <span class="k">import</span> <span class="n">Process</span>
<span class="kn">from</span> <span class="nn">rl_coach.base_parameters</span> <span class="k">import</span> <span class="n">RunType</span>
<span class="kn">from</span> <span class="nn">rl_coach.orchestrators.deploy</span> <span class="k">import</span> <span class="n">Deploy</span><span class="p">,</span> <span class="n">DeployParameters</span>
<span class="kn">from</span> <span class="nn">kubernetes</span> <span class="k">import</span> <span class="n">client</span> <span class="k">as</span> <span class="n">k8sclient</span><span class="p">,</span> <span class="n">config</span> <span class="k">as</span> <span class="n">k8sconfig</span>
<span class="kn">from</span> <span class="nn">rl_coach.memories.backend.memory</span> <span class="k">import</span> <span class="n">MemoryBackendParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.memories.backend.memory_impl</span> <span class="k">import</span> <span class="n">get_memory_backend</span>
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStoreParameters</span>
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store_impl</span> <span class="k">import</span> <span class="n">get_data_store</span>
<span class="k">class</span> <span class="nc">RunTypeParameters</span><span class="p">():</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">command</span><span class="p">:</span> <span class="nb">list</span><span class="p">(),</span> <span class="n">arguments</span><span class="p">:</span> <span class="nb">list</span><span class="p">()</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">run_type</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">TRAINER</span><span class="p">),</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;/checkpoint&quot;</span><span class="p">,</span>
<span class="n">num_replicas</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">orchestration_params</span><span class="p">:</span> <span class="nb">dict</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">image</span> <span class="o">=</span> <span class="n">image</span>
<span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="o">=</span> <span class="n">command</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">arguments</span><span class="p">:</span>
<span class="n">arguments</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">arguments</span> <span class="o">=</span> <span class="n">arguments</span>
<span class="bp">self</span><span class="o">.</span><span class="n">run_type</span> <span class="o">=</span> <span class="n">run_type</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint_dir</span> <span class="o">=</span> <span class="n">checkpoint_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_replicas</span> <span class="o">=</span> <span class="n">num_replicas</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">orchestration_params</span><span class="p">:</span>
<span class="n">orchestration_params</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">orchestration_params</span> <span class="o">=</span> <span class="n">orchestration_params</span>
<span class="k">class</span> <span class="nc">KubernetesParameters</span><span class="p">(</span><span class="n">DeployParameters</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">run_type_params</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">RunTypeParameters</span><span class="p">],</span> <span class="n">kubeconfig</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">namespace</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nfs_server</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">nfs_path</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">checkpoint_dir</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;/checkpoint&#39;</span><span class="p">,</span>
<span class="n">memory_backend_parameters</span><span class="p">:</span> <span class="n">MemoryBackendParameters</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">data_store_params</span><span class="p">:</span> <span class="n">DataStoreParameters</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">run_type_params</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">run_type_param</span> <span class="ow">in</span> <span class="n">run_type_params</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">run_type_params</span><span class="p">[</span><span class="n">run_type_param</span><span class="o">.</span><span class="n">run_type</span><span class="p">]</span> <span class="o">=</span> <span class="n">run_type_param</span>
<span class="bp">self</span><span class="o">.</span><span class="n">kubeconfig</span> <span class="o">=</span> <span class="n">kubeconfig</span>
<span class="bp">self</span><span class="o">.</span><span class="n">namespace</span> <span class="o">=</span> <span class="n">namespace</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nfs_server</span> <span class="o">=</span> <span class="n">nfs_server</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nfs_path</span> <span class="o">=</span> <span class="n">nfs_path</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint_dir</span> <span class="o">=</span> <span class="n">checkpoint_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend_parameters</span> <span class="o">=</span> <span class="n">memory_backend_parameters</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data_store_params</span> <span class="o">=</span> <span class="n">data_store_params</span>
<div class="viewcode-block" id="Kubernetes"><a class="viewcode-back" href="../../../components/orchestrators/index.html#rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes">[docs]</a><span class="k">class</span> <span class="nc">Kubernetes</span><span class="p">(</span><span class="n">Deploy</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An orchestrator implmentation which uses Kubernetes to deploy the components such as training and rollout workers</span>
<span class="sd"> and Redis Pub/Sub in Coach when used in the distributed mode.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">KubernetesParameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param params: The Kubernetes parameters which are used for deploying the components in Coach. These parameters</span>
<span class="sd"> include namespace and kubeconfig.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">kubeconfig</span><span class="p">:</span>
<span class="n">k8sconfig</span><span class="o">.</span><span class="n">load_kube_config</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">k8sconfig</span><span class="o">.</span><span class="n">load_incluster_config</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">:</span>
<span class="n">_</span><span class="p">,</span> <span class="n">current_context</span> <span class="o">=</span> <span class="n">k8sconfig</span><span class="o">.</span><span class="n">list_kube_config_contexts</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span> <span class="o">=</span> <span class="n">current_context</span><span class="p">[</span><span class="s1">&#39;context&#39;</span><span class="p">][</span><span class="s1">&#39;namespace&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;http_proxy&#39;</span><span class="p">):</span>
<span class="n">k8sclient</span><span class="o">.</span><span class="n">Configuration</span><span class="o">.</span><span class="n">_default</span><span class="o">.</span><span class="n">proxy</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;http_proxy&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">memory_backend_parameters</span><span class="o">.</span><span class="n">orchestrator_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;namespace&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span> <span class="o">=</span> <span class="n">get_memory_backend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">memory_backend_parameters</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">orchestrator_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;namespace&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">namespace</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data_store</span> <span class="o">=</span> <span class="n">get_data_store</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">store_type</span> <span class="o">==</span> <span class="s2">&quot;s3&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_access_key</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_secret_key</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">creds_file</span><span class="p">:</span>
<span class="n">s3config</span> <span class="o">=</span> <span class="n">ConfigParser</span><span class="p">()</span>
<span class="n">s3config</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">creds_file</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_access_key</span> <span class="o">=</span> <span class="n">s3config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;default&#39;</span><span class="p">,</span> <span class="s1">&#39;aws_access_key_id&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_secret_key</span> <span class="o">=</span> <span class="n">s3config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;default&#39;</span><span class="p">,</span> <span class="s1">&#39;aws_secret_access_key&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="n">Error</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error when reading S3 credentials file: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_access_key</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;ACCESS_KEY_ID&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">s3_secret_key</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;SECRET_ACCESS_KEY&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setup</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploys the memory backend and data stores if required.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span><span class="o">.</span><span class="n">deploy</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_store</span><span class="o">.</span><span class="n">deploy</span><span class="p">():</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">store_type</span> <span class="o">==</span> <span class="s2">&quot;nfs&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nfs_pvc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_store</span><span class="o">.</span><span class="n">get_info</span><span class="p">()</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">deploy_trainer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploys the training worker in Kubernetes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">trainer_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">TRAINER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">trainer_params</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">trainer_params</span><span class="o">.</span><span class="n">command</span> <span class="o">+=</span> <span class="p">[</span><span class="s1">&#39;--memory_backend_params&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">memory_backend_parameters</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)]</span>
<span class="n">trainer_params</span><span class="o">.</span><span class="n">command</span> <span class="o">+=</span> <span class="p">[</span><span class="s1">&#39;--data_store_params&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)]</span>
<span class="n">name</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{}</span><span class="s2">-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">run_type</span><span class="p">,</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">store_type</span> <span class="o">==</span> <span class="s2">&quot;nfs&quot;</span><span class="p">:</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">image</span><span class="p">,</span>
<span class="n">command</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">command</span><span class="p">,</span>
<span class="n">args</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">arguments</span><span class="p">,</span>
<span class="n">image_pull_policy</span><span class="o">=</span><span class="s1">&#39;Always&#39;</span><span class="p">,</span>
<span class="n">volume_mounts</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1VolumeMount</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;nfs-pvc&#39;</span><span class="p">,</span>
<span class="n">mount_path</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">checkpoint_dir</span>
<span class="p">)],</span>
<span class="n">stdin</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">tty</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">],</span>
<span class="n">volumes</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Volume</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;nfs-pvc&quot;</span><span class="p">,</span>
<span class="n">persistent_volume_claim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nfs_pvc</span>
<span class="p">)],</span>
<span class="n">restart_policy</span><span class="o">=</span><span class="s1">&#39;OnFailure&#39;</span>
<span class="p">),</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">image</span><span class="p">,</span>
<span class="n">command</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">command</span><span class="p">,</span>
<span class="n">args</span><span class="o">=</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">arguments</span><span class="p">,</span>
<span class="n">image_pull_policy</span><span class="o">=</span><span class="s1">&#39;Always&#39;</span><span class="p">,</span>
<span class="n">env</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1EnvVar</span><span class="p">(</span><span class="s2">&quot;ACCESS_KEY_ID&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_access_key</span><span class="p">),</span>
<span class="n">k8sclient</span><span class="o">.</span><span class="n">V1EnvVar</span><span class="p">(</span><span class="s2">&quot;SECRET_ACCESS_KEY&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_secret_key</span><span class="p">)],</span>
<span class="n">stdin</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">tty</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">],</span>
<span class="n">restart_policy</span><span class="o">=</span><span class="s1">&#39;OnFailure&#39;</span>
<span class="p">),</span>
<span class="p">)</span>
<span class="n">job_spec</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1JobSpec</span><span class="p">(</span>
<span class="n">completions</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="n">template</span>
<span class="p">)</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Job</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s2">&quot;batch/v1&quot;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s2">&quot;Job&quot;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">job_spec</span>
<span class="p">)</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">BatchV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">create_namespaced_job</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">job</span><span class="p">)</span>
<span class="n">trainer_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">name</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating job&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">def</span> <span class="nf">deploy_worker</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deploys the rollout worker(s) in Kubernetes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">worker_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">ROLLOUT_WORKER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">worker_params</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">worker_params</span><span class="o">.</span><span class="n">command</span> <span class="o">+=</span> <span class="p">[</span><span class="s1">&#39;--memory_backend_params&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">memory_backend_parameters</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)]</span>
<span class="n">worker_params</span><span class="o">.</span><span class="n">command</span> <span class="o">+=</span> <span class="p">[</span><span class="s1">&#39;--data_store_params&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)]</span>
<span class="n">worker_params</span><span class="o">.</span><span class="n">command</span> <span class="o">+=</span> <span class="p">[</span><span class="s1">&#39;--num_workers&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">worker_params</span><span class="o">.</span><span class="n">num_replicas</span><span class="p">)]</span>
<span class="n">name</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{}</span><span class="s2">-</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">worker_params</span><span class="o">.</span><span class="n">run_type</span><span class="p">,</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">data_store_params</span><span class="o">.</span><span class="n">store_type</span> <span class="o">==</span> <span class="s2">&quot;nfs&quot;</span><span class="p">:</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">image</span><span class="p">,</span>
<span class="n">command</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">command</span><span class="p">,</span>
<span class="n">args</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">arguments</span><span class="p">,</span>
<span class="n">image_pull_policy</span><span class="o">=</span><span class="s1">&#39;Always&#39;</span><span class="p">,</span>
<span class="n">volume_mounts</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1VolumeMount</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;nfs-pvc&#39;</span><span class="p">,</span>
<span class="n">mount_path</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">checkpoint_dir</span>
<span class="p">)],</span>
<span class="n">stdin</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">tty</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">],</span>
<span class="n">volumes</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Volume</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;nfs-pvc&quot;</span><span class="p">,</span>
<span class="n">persistent_volume_claim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nfs_pvc</span>
<span class="p">)],</span>
<span class="n">restart_policy</span><span class="o">=</span><span class="s1">&#39;OnFailure&#39;</span>
<span class="p">),</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">container</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Container</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
<span class="n">image</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">image</span><span class="p">,</span>
<span class="n">command</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">command</span><span class="p">,</span>
<span class="n">args</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">arguments</span><span class="p">,</span>
<span class="n">image_pull_policy</span><span class="o">=</span><span class="s1">&#39;Always&#39;</span><span class="p">,</span>
<span class="n">env</span><span class="o">=</span><span class="p">[</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1EnvVar</span><span class="p">(</span><span class="s2">&quot;ACCESS_KEY_ID&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_access_key</span><span class="p">),</span>
<span class="n">k8sclient</span><span class="o">.</span><span class="n">V1EnvVar</span><span class="p">(</span><span class="s2">&quot;SECRET_ACCESS_KEY&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_secret_key</span><span class="p">)],</span>
<span class="n">stdin</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">tty</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodTemplateSpec</span><span class="p">(</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;app&#39;</span><span class="p">:</span> <span class="n">name</span><span class="p">}),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1PodSpec</span><span class="p">(</span>
<span class="n">containers</span><span class="o">=</span><span class="p">[</span><span class="n">container</span><span class="p">],</span>
<span class="n">restart_policy</span><span class="o">=</span><span class="s1">&#39;OnFailure&#39;</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">job_spec</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1JobSpec</span><span class="p">(</span>
<span class="n">completions</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">num_replicas</span><span class="p">,</span>
<span class="n">parallelism</span><span class="o">=</span><span class="n">worker_params</span><span class="o">.</span><span class="n">num_replicas</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="n">template</span>
<span class="p">)</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1Job</span><span class="p">(</span>
<span class="n">api_version</span><span class="o">=</span><span class="s2">&quot;batch/v1&quot;</span><span class="p">,</span>
<span class="n">kind</span><span class="o">=</span><span class="s2">&quot;Job&quot;</span><span class="p">,</span>
<span class="n">metadata</span><span class="o">=</span><span class="n">k8sclient</span><span class="o">.</span><span class="n">V1ObjectMeta</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">),</span>
<span class="n">spec</span><span class="o">=</span><span class="n">job_spec</span>
<span class="p">)</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">BatchV1Api</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">create_namespaced_job</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">job</span><span class="p">)</span>
<span class="n">worker_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">name</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while creating Job&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">def</span> <span class="nf">worker_logs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="s1">&#39;./logs&#39;</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> :param path: Path to store the worker logs.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">worker_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">ROLLOUT_WORKER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">worker_params</span><span class="p">:</span>
<span class="k">return</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="n">pods</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">pods</span> <span class="o">=</span> <span class="n">api_client</span><span class="o">.</span><span class="n">list_namespaced_pod</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">label_selector</span><span class="o">=</span><span class="s1">&#39;app=</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">worker_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">]</span>
<span class="p">))</span>
<span class="c1"># pod = pods.items[0]</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while reading pods&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">pods</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">pods</span><span class="o">.</span><span class="n">items</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">for</span> <span class="n">pod</span> <span class="ow">in</span> <span class="n">pods</span><span class="o">.</span><span class="n">items</span><span class="p">:</span>
<span class="n">Process</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_tail_log_file</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">pod</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">api_client</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">path</span><span class="p">))</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_tail_log_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pod_name</span><span class="p">,</span> <span class="n">api_client</span><span class="p">,</span> <span class="n">namespace</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">stdout</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">pod_name</span><span class="p">),</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tail_log</span><span class="p">(</span><span class="n">pod_name</span><span class="p">,</span> <span class="n">api_client</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">trainer_logs</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get the logs from trainer.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">trainer_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">TRAINER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">trainer_params</span><span class="p">:</span>
<span class="k">return</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">CoreV1Api</span><span class="p">()</span>
<span class="n">pod</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">pods</span> <span class="o">=</span> <span class="n">api_client</span><span class="o">.</span><span class="n">list_namespaced_pod</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">label_selector</span><span class="o">=</span><span class="s1">&#39;app=</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">trainer_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">]</span>
<span class="p">))</span>
<span class="n">pod</span> <span class="o">=</span> <span class="n">pods</span><span class="o">.</span><span class="n">items</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while reading pods&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">pod</span><span class="p">:</span>
<span class="k">return</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tail_log</span><span class="p">(</span><span class="n">pod</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">api_client</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">tail_log</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pod_name</span><span class="p">,</span> <span class="n">corev1_api</span><span class="p">):</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="c1"># Try to tail the pod logs</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">corev1_api</span><span class="o">.</span><span class="n">read_namespaced_pod_log</span><span class="p">(</span>
<span class="n">pod_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">follow</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">_preload_content</span><span class="o">=</span><span class="kc">False</span>
<span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">),</span> <span class="n">flush</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">end</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">pass</span>
<span class="c1"># This part will get executed if the pod is one of the following phases: not ready, failed or terminated.</span>
<span class="c1"># Check if the pod has errored out, else just try again.</span>
<span class="c1"># Get the pod</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">pod</span> <span class="o">=</span> <span class="n">corev1_api</span><span class="o">.</span><span class="n">read_namespaced_pod</span><span class="p">(</span><span class="n">pod_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">pod</span><span class="p">,</span> <span class="s1">&#39;status&#39;</span><span class="p">)</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">pod</span><span class="o">.</span><span class="n">status</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">pod</span><span class="o">.</span><span class="n">status</span><span class="p">,</span> <span class="s1">&#39;container_statuses&#39;</span><span class="p">)</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">pod</span><span class="o">.</span><span class="n">status</span><span class="o">.</span><span class="n">container_statuses</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">for</span> <span class="n">container_status</span> <span class="ow">in</span> <span class="n">pod</span><span class="o">.</span><span class="n">status</span><span class="o">.</span><span class="n">container_statuses</span><span class="p">:</span>
<span class="k">if</span> <span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">waiting</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">waiting</span><span class="o">.</span><span class="n">reason</span> <span class="o">==</span> <span class="s1">&#39;Error&#39;</span> <span class="ow">or</span> \
<span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">waiting</span><span class="o">.</span><span class="n">reason</span> <span class="o">==</span> <span class="s1">&#39;CrashLoopBackOff&#39;</span> <span class="ow">or</span> \
<span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">waiting</span><span class="o">.</span><span class="n">reason</span> <span class="o">==</span> <span class="s1">&#39;ImagePullBackOff&#39;</span> <span class="ow">or</span> \
<span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">waiting</span><span class="o">.</span><span class="n">reason</span> <span class="o">==</span> <span class="s1">&#39;ErrImagePull&#39;</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">if</span> <span class="n">container_status</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">terminated</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">def</span> <span class="nf">undeploy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Undeploy all the components, such as trainer and rollout worker(s), Redis pub/sub and data store, when required.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">trainer_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">TRAINER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">api_client</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">BatchV1Api</span><span class="p">()</span>
<span class="n">delete_options</span> <span class="o">=</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">V1DeleteOptions</span><span class="p">(</span>
<span class="n">propagation_policy</span><span class="o">=</span><span class="s2">&quot;Foreground&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">trainer_params</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">delete_namespaced_job</span><span class="p">(</span><span class="n">trainer_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">delete_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting trainer&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="n">worker_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">run_type_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">RunType</span><span class="o">.</span><span class="n">ROLLOUT_WORKER</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="n">worker_params</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">api_client</span><span class="o">.</span><span class="n">delete_namespaced_job</span><span class="p">(</span><span class="n">worker_params</span><span class="o">.</span><span class="n">orchestration_params</span><span class="p">[</span><span class="s1">&#39;job_name&#39;</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">namespace</span><span class="p">,</span> <span class="n">delete_options</span><span class="p">)</span>
<span class="k">except</span> <span class="n">k8sclient</span><span class="o">.</span><span class="n">rest</span><span class="o">.</span><span class="n">ApiException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Got exception: </span><span class="si">%s</span><span class="se">\n</span><span class="s2"> while deleting workers&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">memory_backend</span><span class="o">.</span><span class="n">undeploy</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data_store</span><span class="o">.</span><span class="n">undeploy</span><span class="p">()</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2018, Intel AI Lab
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="../../../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
+16 -1
View File
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../components/agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../components/additional_parameters.html">Additional Parameters</a></li>
@@ -358,7 +363,7 @@
<div class="viewcode-block" id="VectorObservationSpace"><a class="viewcode-back" href="../../components/spaces.html#rl_coach.spaces.VectorObservationSpace">[docs]</a><span class="k">class</span> <span class="nc">VectorObservationSpace</span><span class="p">(</span><span class="n">ObservationSpace</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An observation space which is defined as a vector of elements. This can be particularly useful for environments</span>
<span class="sd"> which return measurements, such as in robotic environmnets.</span>
<span class="sd"> which return measurements, such as in robotic environments.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">shape</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">low</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]</span><span class="o">=-</span><span class="n">np</span><span class="o">.</span><span class="n">inf</span><span class="p">,</span>
<span class="n">high</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">inf</span><span class="p">,</span> <span class="n">measurements_names</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
@@ -372,6 +377,16 @@
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">shape</span><span class="p">,</span> <span class="n">low</span><span class="p">,</span> <span class="n">high</span><span class="p">)</span></div>
<span class="k">class</span> <span class="nc">TensorObservationSpace</span><span class="p">(</span><span class="n">ObservationSpace</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An observation space which defines observations with arbitrary shape. This can be particularly useful for</span>
<span class="sd"> environments with non image input.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">shape</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">low</span><span class="p">:</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">inf</span><span class="p">,</span>
<span class="n">high</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">inf</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">shape</span><span class="p">,</span> <span class="n">low</span><span class="p">,</span> <span class="n">high</span><span class="p">)</span>
<div class="viewcode-block" id="PlanarMapsObservationSpace"><a class="viewcode-back" href="../../components/spaces.html#rl_coach.spaces.PlanarMapsObservationSpace">[docs]</a><span class="k">class</span> <span class="nc">PlanarMapsObservationSpace</span><span class="p">(</span><span class="n">ObservationSpace</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An observation space which defines a stack of 2D observations. For example, an environment which returns</span>
@@ -0,0 +1,10 @@
Data Stores
===========
S3DataStore
-----------
.. autoclass:: rl_coach.data_stores.s3_data_store.S3DataStore
NFSDataStore
------------
.. autoclass:: rl_coach.data_stores.nfs_data_store.NFSDataStore
@@ -0,0 +1,6 @@
Memory Backends
===============
RedisPubSubBackend
------------------
.. autoclass:: rl_coach.memories.backend.redis.RedisPubSubBackend
@@ -0,0 +1,7 @@
Orchestrators
=============
Kubernetes
----------
.. autoclass:: rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes
+29 -138
View File
@@ -1,148 +1,39 @@
# Scaling out rollout workers
.. _dist-coach-design:
This document contains some options for how we could implement horizontal scaling of rollout workers in coach, though most details are not specific to coach. A few options are laid out, my current suggestion would be to start with Option 1, and move on to Option 1a or Option 1b as required.
Distributed Coach - Horizontal Scale-Out
========================================
## Off Policy Algorithms
Coach supports the horizontal scale-out of rollout workers using `--distributed_coach` or `-dc` options. Coach uses
three interfaces for horizontal scale-out, which allows for integration with different technologies and flexibility.
These three interfaces are orchestrator, memory backend and data store.
### Option 1 - master polls file system
* **Orchestrator** - The orchestrator interface provides basic interaction points for orchestration, scheduling and
resource management of training and rollout workers in the distributed coach mode. The interactions points define
how Coach should deploy, undeploy and monitor the workers spawned by Coach.
- one master process samples memories and updates the policy
- many worker processes execute rollouts
- coordinate using a single shared networked file system: nfs, ceph, dat, s3fs, etc.
- policy sync communication method:
- master process occasionally writes policy to shared file system
- worker processes occasionally read policy from shared file system
- prevent workers from reading a policy which has not been completely written to disk using either:
- redis lock
- write to temporary files and then rename
- rollout memories:
- sync communication method:
- worker processes write rollout memories as they are generated to shared filesystem
- master process occasionally reads rollout memories from shared file system
- master process must be resilient to corrupted or incompletely written memories
- sampling method:
- master process keeps all rollouts in memory utilizing existing coach memory classes
- control flow:
- master:
- run training updates interleaved with loading of any newly available rollouts in memory
- periodically write policy to disk
- workers:
- periodically read policy from disk
- evaluate rollouts and write them to disk
- ops:
- kubernetes yaml, kml, docker compose, etc
- a default shared file system can be provided, while allowing the user to specify something else if desired
- a default method of launching the workers and master (in kubernetes, gce, aws, etc) can be provided
* **Memory Backend** - This interface is used as the backing store or stream for the memory abstraction in
distributed Coach. The implementation of this module is mainly used for communicating experiences (transitions
and episodes) from the rollout to the training worker.
#### Pros
* **Data Store** - This interface is used as a backing store for the policy checkpoints. It is mainly used to
synchronizing policy checkpoints from the training to the rollout worker.
- very simple to implement, infrastructure already available in ai-lab-kubernetes
- fast enough for proof of concept and iteration of interface design
- rollout memories are durable and can be easily reused in later off policy training
- if designed properly, there is a clear path towards:
- decreasing latency using in-memory store (option 1a/b)
- increasing rollout memory size using distributed sampling methods (option 1c)
.. image:: /_static/img/horizontal-scale-out.png
:width: 800px
:align: center
#### Cons
Supported Synchronization Types
-------------------------------
- file system interface incurs additional latency. rollout memories must be written to disk, and later read from disk, instead of going directly from memory to memory.
- will require modifying standard control flow. there will be an impact on algorithms which expect particular training regimens. Specifically, algorithms which are sensitive to the number of update steps between target/online network updates
- will not be particularly efficient in strictly on policy algorithms where each rollout must use the most recent policy available
Synchronization type refers to the mechanism by which the policy checkpoints are synchronized from the training to the
rollout worker. For each algorithm, it is specified by using the `DistributedCoachSynchronizationType` as a part of
`agent_params.algorithm.distributed_coach_synchronization_type` in the preset. In distributed Coach, two types of
synchronization modes are supported: `SYNC` and `ASYNC`.
### Option 1a - master polls (redis) list
* **SYNC** - In this type, the trainer waits for all the experiences to be gathered from distributed rollout workers
before training a new policy and the rollout workers wait for a new policy before gathering experiences. It is suitable
for ON policy algorithms.
- instead of using a file system as in Option 1, redis lists can be used
- policy is stored as a single key/value pair (locking no longer necessary)
- rollout memory communication:
- workers: redis list push
- master: redis list len, redis list range
- note: many databases are interchangeable with redis protocol: google memorystore, aws elasticache, etc.
- note: many databases can implement this interface with minimal glue: SQL, any objectstore, etc.
#### Pros
- lower latency than disk since it is all in memory
- clear path toward scaling to large number of workers
- no concern about reading partially written rollouts
- no synchronization or additional threads necessary, though an additional thread would be helpful for concurrent reads from redis and training
- will be slightly more efficient in the case of strictly on policy algorithms
#### Cons
- more complex to set up, especially if you are concerned about rollout memory durability
### Option 1b - master subscribes to (redis) pub sub
- instead of using a file system as in Option 1, redis pub sub can be used
- policy is stored as a single key/value pair (locking no longer necessary)
- rollout memory communication:
- workers: redis publish
- master: redis subscribe
- no synchronization necessary, however an additional thread would be necessary?
- it looks like the python client might handle this already, would need further investigation
- note: many possible pub sub systems could be used with different characteristics under specific contexts: kafka, google pub/sub, aws kinesis, etc
#### Pros
- lower latency than disk since it is all in memory
- clear path toward scaling to large number of workers
- no concern about reading partially written rollouts
- will be slightly more efficient in the case of strictly on policy algorithms
#### Cons
- more complex to set up then shared file system
- on its own, does not persist worker rollouts for future off policy training
### Option 1c - distributed rollout memory sampling
- if rollout memories do not fit in memory of a single machine, a distributed storage and sampling method would be necessary
- for example:
- rollout memory store: redis set add
- rollout memory sample: redis set randmember
#### Pros
- capable of taking advantage of rollout memory larger than the available memory of a single machine
- reduce resource constraints on training machine
#### Cons
- distributed versions of each memory type/sampling method need to be custom built
- off-the-shelf implementations may not be available for complex memory types/sampling methods
### Option 2 - master listens to workers
- rollout memories:
- workers send memories directly to master via: mpi, 0mq, etc
- master policy thread listens for new memories and stores them in shared memory
- policy sync communication memory:
- master policy occasionally sends policies directly to workers via: mpi, 0mq, etc
- master and workers must synchronize so that all workers are listening when the master is ready to send a new policy
#### Pros
- lower latency than option 1 (for a small number of workers)
- will potentially be the optimal choice in the case of strictly on policy algorithms with relatively small number of worker nodes (small enough that more complex communication typologies would be necessary: rings, p2p, etc)
#### Cons
- much less robust and more difficult to debug requiring lots of synchronization
- much more difficult to be resiliency worker failure
- more custom communication/synchronization code
- as the number of workers scale up, a larger and larger fraction of time will be spent waiting and synchronizing
### Option 3 - Ray
#### Pros
- Ray would allow us to easily convert our current algorithms to distributed versions, with minimal change to our code.
#### Cons
- performance from naïve/simple use would be very similar to Option 2
- nontrivial to replace with a higher performance system if desired. Additional performance will require significant code changes.
## On Policy Algorithms
TODO
* **ASYNC** - In this type, the trainer doesn't wait for any set of experiences to be gathered from distributed
rollout workers and the rollout workers continously gather experiences loading new policies, whenever they become
available. It is suitable for OFF policy algorithms.
+239
View File
@@ -0,0 +1,239 @@
.. _dist-coach-usage:
Usage - Distributed Coach
=========================
Coach supports the horizontal scale-out of rollout workers in distributed mode. For more information on the design and
implementation of distributed Coach, see :ref:`dist-coach-design`. In the rest of this section, we will describe how to
get started with distributed Coach.
Interfaces and Implementations
------------------------------
Coach uses three interfaces to orchestrate, schedule and manager the resources of workers it spawns in the distributed
mode. These interfaces are the orchestrator, memory backend and the data store. Refer to :ref:`dist-coach-design` for
more information. The following implementation(s) are available for each interface:
* **Orchestrator** - `Kubernetes <https://kubernetes.io>`_.
* **Memory Backend** - `Redis Pub/Sub <https://redis.io/topics/pubsub>`_.
* **Data Store** - `S3 <https://aws.amazon.com/s3>`_ and `NFS <https://en.wikipedia.org/wiki/Network_File_System>`_.
Prerequisites
-------------
* Building and pushing containers - `Docker <https://docs.docker.com/install/linux/docker-ce/ubuntu>`_.
* Container registry access for hosting container images - `Docker Hub <https://hub.docker.com>`_
* Using Kubernetes for orchestration - `Kubernetes configuration <https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/>`_.
* Using S3 for storing policy checkpoints - `AWS CLI <https://docs.aws.amazon.com/cli/latest/userguide/installing.html>_,
`AWS credentials <https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks>`_
and `S3 bucket <https://docs.aws.amazon.com/AmazonS3/latest/user-guide/create-bucket.html>`_.
Clone the Repository
--------------------
.. code-block:: bash
$ git clone git@github.com:NervanaSystems/coach.git
$ cd coach
Build Container Image and Push
------------------------------
Create a directory `docker`.
.. code-block:: bash
$ mkdir docker
Create docker files in the `docker` directory.
A sample base docker file (Dockerfile.base) would look like this:
.. code-block:: bash
FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
################################
# Install apt-get Requirements #
################################
# General
RUN apt-get update && \
apt-get install -y python3-pip cmake zlib1g-dev python3-tk python-opencv \
# Boost libraries
libboost-all-dev \
# Scipy requirements
libblas-dev liblapack-dev libatlas-base-dev gfortran \
# Pygame requirements
libsdl-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev \
libsmpeg-dev libportmidi-dev libavformat-dev libswscale-dev \
# Dashboard
dpkg-dev build-essential python3.5-dev libjpeg-dev libtiff-dev libsdl1.2-dev libnotify-dev \
freeglut3 freeglut3-dev libsm-dev libgtk2.0-dev libgtk-3-dev libwebkitgtk-dev libgtk-3-dev \
libwebkitgtk-3.0-dev libgstreamer-plugins-base1.0-dev \
# Gym
libav-tools libsdl2-dev swig cmake \
# Mujoco_py
curl libgl1-mesa-dev libgl1-mesa-glx libglew-dev libosmesa6-dev software-properties-common \
# ViZDoom
build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip wget && \
apt-get clean autoclean && \
apt-get autoremove -y
############################
# Install Pip Requirements #
############################
RUN pip3 install --upgrade pip
RUN pip3 install setuptools==39.1.0 && pip3 install pytest && pip3 install pytest-xdist
RUN curl -o /usr/local/bin/patchelf https://s3-us-west-2.amazonaws.com/openai-sci-artifacts/manual-builds/patchelf_0.9_amd64.elf \
&& chmod +x /usr/local/bin/patchelf
A sample docker file for the gym environment would look like this:
.. code-block:: bash
FROM coach-base:master as builder
# prep gym and any of its related requirements.
RUN pip3 install gym[atari,box2d,classic_control]==0.10.5
# add coach source starting with files that could trigger
# re-build if dependencies change.
RUN mkdir /root/src
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
RUN pip3 install -r /root/src/requirements.txt
FROM coach-base:master
WORKDIR /root/src
COPY --from=builder /root/.cache /root/.cache
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
COPY README.md /root/src/.
RUN pip3 install gym[atari,box2d,classic_control]==0.10.5 && pip3 install -e .[all] && rm -rf /root/.cache
COPY . /root/src
A sample docker file for the Mujoco environment would look like this:
.. code-block:: bash
FROM coach-base:master as builder
# prep mujoco and any of its related requirements.
# Mujoco
RUN mkdir -p ~/.mujoco \
&& wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
&& unzip -n mujoco.zip -d ~/.mujoco \
&& rm mujoco.zip
ARG MUJOCO_KEY
ENV MUJOCO_KEY=$MUJOCO_KEY
ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
RUN pip3 install mujoco_py
# add coach source starting with files that could trigger
# re-build if dependencies change.
RUN mkdir /root/src
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
RUN pip3 install -r /root/src/requirements.txt
FROM coach-base:master
WORKDIR /root/src
COPY --from=builder /root/.mujoco /root/.mujoco
ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
COPY --from=builder /root/.cache /root/.cache
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
COPY README.md /root/src/.
RUN pip3 install mujoco_py && pip3 install -e .[all] && rm -rf /root/.cache
COPY . /root/src
A sample docker file for the ViZDoom environment would look like this:
.. code-block:: bash
FROM coach-base:master as builder
# prep vizdoom and any of its related requirements.
RUN pip3 install vizdoom
# add coach source starting with files that could trigger
# re-build if dependencies change.
RUN mkdir /root/src
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
RUN pip3 install -r /root/src/requirements.txt
FROM coach-base:master
WORKDIR /root/src
COPY --from=builder /root/.cache /root/.cache
COPY setup.py /root/src/.
COPY requirements.txt /root/src/.
COPY README.md /root/src/.
RUN pip3 install vizdoom && pip3 install -e .[all] && rm -rf /root/.cache
COPY . /root/src
Build the base container. Make sure you are in the Coach root directory before building.
.. code-block:: bash
$ docker build -t coach-base:master -f docker/Dockerfile.base .
If you would like to use the Mujoco environment, save this key as an environment variable. Replace `<mujoco_key>` with the
contents of your mujoco key.
.. code-block:: bash
$ export MUJOCO_KEY=<mujoco_key>
Build the container for your environment.
Replace `<env>` with your choice of environment. The choices are `gym`, `mujoco` and `doom`.
Replace `<user-name>`, `<image-name>` and `<tag>` with appropriate values.
.. code-block:: bash
$ docker build --build-arg MUJOCO_KEY=${MUJOCO_KEY} -t <user-name>/<image-name>:<tag> -f docker/Dockerfile.<env> .
Push the container to a registry of your choice. Replace `<user-name>`, `<image-name>` and `<tag>` with appropriate values.
.. code-block:: bash
$ docker push <user-name>/<image-name>:<tag>
Create a Config file
--------------------
Add the following contents to file.
Replace `<user-name>`, `<image-name>`, `<tag>`, `<bucket-name>` and `<path-to-aws-credentials>` with appropriate values.
.. code-block:: bash
[coach]
image = <user-name>/<image-name>:<tag>
memory_backend = redispubsub
data_store = s3
s3_end_point = s3.amazonaws.com
s3_bucket_name = <bucket-name>
s3_creds_file = <path-to-aws-credentials>
Run Distributed Coach
---------------------
The following command will run distributed Coach with CartPole_ClippedPPO preset, Redis Pub/Sub as the memory backend, S3 as the data store in Kubernetes
with three rollout workers.
.. code-block:: bash
$ python3 rl_coach/coach.py -p CartPole_ClippedPPO \
-dc \
-e <experiment-name> \
-n 3 \
-dcp <path-to-config-file>
+5
View File
@@ -36,6 +36,7 @@ You can find more details in the `GitHub repository <https://github.com/NervanaS
:titlesonly:
usage
dist_usage
features/index
selecting_an_algorithm
dashboard
@@ -47,6 +48,7 @@ You can find more details in the `GitHub repository <https://github.com/NervanaS
design/control_flow
design/network
design/horizontal_scaling
.. toctree::
:maxdepth: 1
@@ -61,10 +63,13 @@ You can find more details in the `GitHub repository <https://github.com/NervanaS
components/agents/index
components/architectures/index
components/data_stores/index
components/environments/index
components/exploration_policies/index
components/filters/index
components/memories/index
components/memory_backends/index
components/orchestrators/index
components/core_types
components/spaces
components/additional_parameters
+16 -2
View File
@@ -1,7 +1,7 @@
Usage
=====
One of the mechanism Coach uses for running experiments is the **Preset** mechanism.
One of the mechanisms Coach uses for running experiments is the **Preset** mechanism.
As its name implies, a preset defines a set of predefined experiment parameters.
This allows defining a *complex* agent-environment interaction, with multiple parameters, and later running it through
a very *simple* command line.
@@ -29,7 +29,7 @@ To list the available presets, use the `-l` flag.
Multi-threaded Algorithms
+++++++++++++++++++++++++
Multi-threaded algorithms are very common this days.
Multi-threaded algorithms are very common these days.
They typically achieve the best results, and scale gracefully with the number of threads.
In Coach, running such algorithms is done by selecting a suitable preset, and choosing the number of threads to run using the :code:`-n` flag.
@@ -39,6 +39,20 @@ In Coach, running such algorithms is done by selecting a suitable preset, and ch
coach -p CartPole_A3C -n 8
Multi-Node Algorithms
+++++++++++++++++++++++++
Coach supports the multi-node runs in distributed mode. Specifically, the horizontal scale-out of rollout workers is implemented.
In Coach, running such algorithms is done by selecting a suitable preset, enabling distributed coach using :code:`-dc` flag,
passing distributed coach parameters using :code:`dcp` and choosing the number of to run using the :code:`-n` flag.
For more details and instructions on how to use distributed Coach, see :ref:`dist-coach-usage`.
*Example:*
.. code-block:: python
coach -p CartPole_ClippedPPO -dc -dcp <path-to-config-file> -n 8
Evaluating an Agent
-------------------
+10 -2
View File
@@ -86,6 +86,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dashboard.html">Coach Dashboard</a></li>
@@ -94,6 +95,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -104,10 +106,13 @@
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="agents/index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="spaces.html">Spaces</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Additional Parameters</a><ul>
@@ -271,7 +276,7 @@ of the trace tests suite.</li>
<h2>TaskParameters<a class="headerlink" href="#taskparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.TaskParameters">
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">TaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks = &lt;Frameworks.tensorflow: 'TensorFlow'&gt;</em>, <em>evaluate_only: bool = False</em>, <em>use_cpu: bool = False</em>, <em>experiment_path='/tmp'</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_dir=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#TaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.TaskParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">TaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks = &lt;Frameworks.tensorflow: 'TensorFlow'&gt;</em>, <em>evaluate_only: bool = False</em>, <em>use_cpu: bool = False</em>, <em>experiment_path='/tmp'</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_dir=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em>, <em>apply_stop_condition: bool = False</em>, <em>num_gpu: int = 1</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#TaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.TaskParameters" title="Permalink to this definition"></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
@@ -286,6 +291,8 @@ of the trace tests suite.</li>
<li><strong>checkpoint_restore_dir</strong> the directory to restore the checkpoints from</li>
<li><strong>checkpoint_save_dir</strong> the directory to store the checkpoints in</li>
<li><strong>export_onnx_graph</strong> If set to True, this will export an onnx graph each time a checkpoint is saved</li>
<li><strong>apply_stop_condition</strong> If set to True, this will apply the stop condition defined by reaching a target success rate</li>
<li><strong>num_gpu</strong> number of GPUs to use</li>
</ul>
</td>
</tr>
@@ -298,7 +305,7 @@ of the trace tests suite.</li>
<h2>DistributedTaskParameters<a class="headerlink" href="#distributedtaskparameters" title="Permalink to this headline"></a></h2>
<dl class="class">
<dt id="rl_coach.base_parameters.DistributedTaskParameters">
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">DistributedTaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks</em>, <em>parameters_server_hosts: str</em>, <em>worker_hosts: str</em>, <em>job_type: str</em>, <em>task_index: int</em>, <em>evaluate_only: bool = False</em>, <em>num_tasks: int = None</em>, <em>num_training_tasks: int = None</em>, <em>use_cpu: bool = False</em>, <em>experiment_path=None</em>, <em>dnd=None</em>, <em>shared_memory_scratchpad=None</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_dir=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#DistributedTaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.DistributedTaskParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="descclassname">rl_coach.base_parameters.</code><code class="descname">DistributedTaskParameters</code><span class="sig-paren">(</span><em>framework_type: rl_coach.base_parameters.Frameworks</em>, <em>parameters_server_hosts: str</em>, <em>worker_hosts: str</em>, <em>job_type: str</em>, <em>task_index: int</em>, <em>evaluate_only: bool = False</em>, <em>num_tasks: int = None</em>, <em>num_training_tasks: int = None</em>, <em>use_cpu: bool = False</em>, <em>experiment_path=None</em>, <em>dnd=None</em>, <em>shared_memory_scratchpad=None</em>, <em>seed=None</em>, <em>checkpoint_save_secs=None</em>, <em>checkpoint_restore_dir=None</em>, <em>checkpoint_save_dir=None</em>, <em>export_onnx_graph: bool = False</em>, <em>apply_stop_condition: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rl_coach/base_parameters.html#DistributedTaskParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.base_parameters.DistributedTaskParameters" title="Permalink to this definition"></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
@@ -321,6 +328,7 @@ assigned</li>
<li><strong>checkpoint_restore_dir</strong> the directory to restore the checkpoints from</li>
<li><strong>checkpoint_save_dir</strong> the directory to store the checkpoints in</li>
<li><strong>export_onnx_graph</strong> If set to True, this will export an onnx graph each time a checkpoint is saved</li>
<li><strong>apply_stop_condition</strong> If set to True, this will apply the stop condition defined by reaching a target success rate</li>
</ul>
</td>
</tr>
+5
View File
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -134,10 +136,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -134,10 +136,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
+60 -2
View File
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -127,10 +129,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../additional_parameters.html">Additional Parameters</a></li>
@@ -325,6 +330,23 @@ training or testing.</p>
</table>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.collect_savers">
<code class="descname">collect_savers</code><span class="sig-paren">(</span><em>parent_path_suffix: str</em><span class="sig-paren">)</span> &#x2192; rl_coach.saver.SaverCollection<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.collect_savers"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.collect_savers" title="Permalink to this definition"></a></dt>
<dd><p>Collect all of agents network savers
:param parent_path_suffix: path suffix of the parent of the agent</p>
<blockquote>
<div>(could be name of level manager or composite agent)</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">collection of all agent savers</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.create_networks">
<code class="descname">create_networks</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; Dict[str, rl_coach.architectures.network_wrapper.NetworkWrapper]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.create_networks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.create_networks" title="Permalink to this definition"></a></dt>
@@ -341,6 +363,26 @@ for creating the network.</p>
</table>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.emulate_act_on_trainer">
<code class="descname">emulate_act_on_trainer</code><span class="sig-paren">(</span><em>transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> &#x2192; rl_coach.core_types.ActionInfo<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.emulate_act_on_trainer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.emulate_act_on_trainer" title="Permalink to this definition"></a></dt>
<dd><p>This emulates the act using the transition obtained from the rollout worker on the training worker
in case of distributed training.
Given the agents current knowledge, decide on the next action to apply to the environment
:return: an action and a dictionary containing any additional info from the action decision process</p>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.emulate_observe_on_trainer">
<code class="descname">emulate_observe_on_trainer</code><span class="sig-paren">(</span><em>transition: rl_coach.core_types.Transition</em><span class="sig-paren">)</span> &#x2192; bool<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.emulate_observe_on_trainer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.emulate_observe_on_trainer" title="Permalink to this definition"></a></dt>
<dd><p>This emulates the observe using the transition obtained from the rollout worker on the training worker
in case of distributed training.
Given a response from the environment, distill the observation from it and store it for later use.
The response should be a dictionary containing the performed action, the new observation and measurements,
the reward, a game over flag and any additional information necessary.
:return:</p>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.get_predictions">
<code class="descname">get_predictions</code><span class="sig-paren">(</span><em>states: List[Dict[str, numpy.ndarray]], prediction_type: rl_coach.core_types.PredictionType</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.get_predictions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.get_predictions" title="Permalink to this definition"></a></dt>
@@ -580,6 +622,22 @@ by val, and by the current phase set in self.phase.</p>
</table>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.restore_checkpoint">
<code class="descname">restore_checkpoint</code><span class="sig-paren">(</span><em>checkpoint_dir: str</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.restore_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.restore_checkpoint" title="Permalink to this definition"></a></dt>
<dd><p>Allows agents to store additional information when saving checkpoints.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>checkpoint_dir</strong> The checkpoint dir to restore from</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference">
<code class="descname">run_pre_network_filter_for_inference</code><span class="sig-paren">(</span><em>state: Dict[str, numpy.ndarray]</em><span class="sig-paren">)</span> &#x2192; Dict[str, numpy.ndarray]<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.run_pre_network_filter_for_inference"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.run_pre_network_filter_for_inference" title="Permalink to this definition"></a></dt>
@@ -598,13 +656,13 @@ by val, and by the current phase set in self.phase.</p>
<dl class="method">
<dt id="rl_coach.agents.agent.Agent.save_checkpoint">
<code class="descname">save_checkpoint</code><span class="sig-paren">(</span><em>checkpoint_id: int</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.save_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.save_checkpoint" title="Permalink to this definition"></a></dt>
<code class="descname">save_checkpoint</code><span class="sig-paren">(</span><em>checkpoint_prefix: str</em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/rl_coach/agents/agent.html#Agent.save_checkpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.agent.Agent.save_checkpoint" title="Permalink to this definition"></a></dt>
<dd><p>Allows agents to store additional information when saving checkpoints.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>checkpoint_id</strong> the id of the checkpoint</td>
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>checkpoint_prefix</strong> The prefix of the checkpoint file to save</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
+5
View File
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -85,6 +85,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -93,6 +94,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -103,10 +105,13 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -135,10 +137,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -136,10 +138,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -134,10 +136,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -134,10 +136,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -134,10 +136,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>
@@ -87,6 +87,7 @@
<p class="caption"><span class="caption-text">Intro</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dist_usage.html">Usage - Distributed Coach</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../features/index.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../selecting_an_algorithm.html">Selecting an Algorithm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dashboard.html">Coach Dashboard</a></li>
@@ -95,6 +96,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../design/control_flow.html">Control Flow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/network.html">Network Design</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../design/horizontal_scaling.html">Distributed Coach - Horizontal Scale-Out</a></li>
</ul>
<p class="caption"><span class="caption-text">Contributing</span></p>
<ul>
@@ -131,10 +133,13 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../architectures/index.html">Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_stores/index.html">Data Stores</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../environments/index.html">Environments</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../exploration_policies/index.html">Exploration Policies</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../filters/index.html">Filters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memories/index.html">Memories</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../memory_backends/index.html">Memory Backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../orchestrators/index.html">Orchestrators</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../core_types.html">Core Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../spaces.html">Spaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../additional_parameters.html">Additional Parameters</a></li>

Some files were not shown because too many files have changed in this diff Show More