mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Docs changes - fixing blogpost links, removing importing all exploration policies (#139)
* updated docs * removing imports for all exploration policies in __init__ + setting the right blog-post link * small cleanups
This commit is contained in:
committed by
Scott Leishman
parent
155b78b995
commit
f12857a8c7
@@ -28,7 +28,7 @@ Blog posts from the Intel® AI website:
|
||||
* [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release)
|
||||
* [Release 0.9.0](https://ai.intel.com/reinforcement-learning-coach-carla-qr-dqn/)
|
||||
* [Release 0.10.0](https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)
|
||||
* Release 0.11 (current release)
|
||||
* [Release 0.11.0](https://ai.intel.com/rl-coach-data-science-at-scale) (current release)
|
||||
|
||||
Contacting the Coach development team is also possible through the email [coach@intel.com](coach@intel.com)
|
||||
|
||||
|
||||
@@ -195,13 +195,11 @@
|
||||
<span class="c1">#</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">copy</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">random</span>
|
||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">OrderedDict</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Tuple</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">pandas</span> <span class="k">import</span> <span class="n">read_pickle</span>
|
||||
<span class="kn">from</span> <span class="nn">six.moves</span> <span class="k">import</span> <span class="nb">range</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.agents.agent_interface</span> <span class="k">import</span> <span class="n">AgentInterface</span>
|
||||
|
||||
@@ -215,7 +215,8 @@
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">num_steps_between_copying_online_weights_to_target</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span></div>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">True</span></div>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">DQNNetworkParameters</span><span class="p">(</span><span class="n">NetworkParameters</span><span class="p">):</span>
|
||||
|
||||
@@ -391,6 +391,9 @@
|
||||
<span class="c1"># Should the workers wait for full episode</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># Support for parameter noise</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="PresetValidationParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.PresetValidationParameters">[docs]</a><span class="k">class</span> <span class="nc">PresetValidationParameters</span><span class="p">(</span><span class="n">Parameters</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.data_stores.nfs_data_store</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.data_stores.s3_data_store</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
<span class="kn">from</span> <span class="nn">minio</span> <span class="k">import</span> <span class="n">Minio</span>
|
||||
<span class="kn">from</span> <span class="nn">minio.error</span> <span class="k">import</span> <span class="n">ResponseError</span>
|
||||
<span class="kn">from</span> <span class="nn">configparser</span> <span class="k">import</span> <span class="n">ConfigParser</span><span class="p">,</span> <span class="n">Error</span>
|
||||
|
||||
@@ -216,7 +216,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.additive_noise:AdditiveNoise'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.additive_noise.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent</span>
|
||||
<span class="sd"> and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that</span>
|
||||
|
||||
@@ -215,7 +215,7 @@
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.boltzmann.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible</span>
|
||||
<span class="sd"> actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values</span>
|
||||
|
||||
@@ -218,7 +218,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.bootstrapped:Bootstrapped'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.bootstrapped.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Bootstrapped exploration policy is currently only used for discrete action spaces along with the</span>
|
||||
<span class="sd"> Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the</span>
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.categorical:Categorical'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.categorical.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Categorical exploration policy is intended for discrete action spaces. It expects the action values to</span>
|
||||
<span class="sd"> represent a probability distribution over the action, from which a single action will be sampled.</span>
|
||||
|
||||
@@ -203,7 +203,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Continuous entropy is an exploration policy that is actually implemented as part of the network.</span>
|
||||
<span class="sd"> The exploration policy class is only a placeholder for choosing this policy. The exploration policy is</span>
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.e_greedy:EGreedy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</span>
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.exploration_policy:ExplorationPolicy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An exploration policy takes the predicted actions or action values from the agent, and selects the action to</span>
|
||||
<span class="sd"> actually apply to the environment using some predefined algorithm.</span>
|
||||
@@ -222,14 +222,14 @@
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">HEATUP</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">action_space</span>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.reset">[docs]</a> <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">[docs]</a> <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Used for resetting the exploration policy parameters when needed</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">pass</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.get_action">[docs]</a> <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-></span> <span class="n">ActionType</span><span class="p">:</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">[docs]</a> <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-></span> <span class="n">ActionType</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Given a list of values corresponding to each action, </span>
|
||||
<span class="sd"> choose one actions according to the exploration policy</span>
|
||||
@@ -243,7 +243,7 @@
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The get_action function should be overridden in the inheriting exploration class"</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.change_phase">[docs]</a> <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">[docs]</a> <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Change between running phases of the algorithm</span>
|
||||
<span class="sd"> :param phase: Either Heatup or Train</span>
|
||||
@@ -251,7 +251,7 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">phase</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">[docs]</a> <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">[docs]</a> <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Allows exploration policies to define if they require the action values for the current step.</span>
|
||||
<span class="sd"> This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller</span>
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.greedy:Greedy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.greedy.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The Greedy exploration policy is intended for both discrete and continuous action spaces.</span>
|
||||
<span class="sd"> For discrete action spaces, it always selects the action with the maximum value, as given by the agent.</span>
|
||||
|
||||
@@ -219,7 +219,7 @@
|
||||
|
||||
|
||||
<span class="c1"># Ornstein-Uhlenbeck process</span>
|
||||
<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ou_process.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> OUProcess exploration policy is intended for continuous action spaces, and selects the action according to</span>
|
||||
<span class="sd"> an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where</span>
|
||||
|
||||
@@ -210,7 +210,8 @@
|
||||
<span class="k">class</span> <span class="nc">ParameterNoiseParameters</span><span class="p">(</span><span class="n">ExplorationParameters</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent_params</span><span class="p">:</span> <span class="n">AgentParameters</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">agent_params</span><span class="p">,</span> <span class="n">DQNAgentParameters</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">agent_params</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">supports_parameter_noise</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Currently only DQN variants are supported for using an exploration type of "</span>
|
||||
<span class="s2">"ParameterNoise."</span><span class="p">)</span>
|
||||
|
||||
@@ -221,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.parameter_noise:ParameterNoise'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.parameter_noise.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.</span>
|
||||
<span class="sd"> It applies the exploration policy by replacing all the dense network layers with noisy layers.</span>
|
||||
|
||||
@@ -218,7 +218,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.truncated_normal:TruncatedNormal'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.truncated_normal.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a</span>
|
||||
<span class="sd"> normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t</span>
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.ucb:UCB'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ucb.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.</span>
|
||||
<span class="sd"> It assumes that there are multiple network heads that are predicting action values, and that the standard deviation</span>
|
||||
|
||||
@@ -178,7 +178,23 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.memories.backend.redis</h1><div class="highlight"><pre>
|
||||
<span></span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">redis</span>
|
||||
<span class="kn">import</span> <span class="nn">pickle</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.memories.non_episodic.transition_collection</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TransitionCollection"><a class="viewcode-back" href="../../../../components/memories/index.html#rl_coach.memories.non_episodic.TransitionCollection">[docs]</a><span class="k">class</span> <span class="nc">TransitionCollection</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.orchestrators.kubernetes_orchestrator</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
|
||||
@@ -38,50 +38,50 @@ spaces.
|
||||
|
||||
ExplorationPolicy
|
||||
-----------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
|
||||
.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
|
||||
:members:
|
||||
:inherited-members:
|
||||
|
||||
AdditiveNoise
|
||||
-------------
|
||||
.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
|
||||
.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
|
||||
|
||||
Boltzmann
|
||||
---------
|
||||
.. autoclass:: rl_coach.exploration_policies.Boltzmann
|
||||
.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
|
||||
|
||||
Bootstrapped
|
||||
------------
|
||||
.. autoclass:: rl_coach.exploration_policies.Bootstrapped
|
||||
.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
|
||||
|
||||
Categorical
|
||||
-----------
|
||||
.. autoclass:: rl_coach.exploration_policies.Categorical
|
||||
.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
|
||||
|
||||
ContinuousEntropy
|
||||
-----------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
|
||||
.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
|
||||
|
||||
EGreedy
|
||||
-------
|
||||
.. autoclass:: rl_coach.exploration_policies.EGreedy
|
||||
.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
|
||||
|
||||
Greedy
|
||||
------
|
||||
.. autoclass:: rl_coach.exploration_policies.Greedy
|
||||
.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
|
||||
|
||||
OUProcess
|
||||
---------
|
||||
.. autoclass:: rl_coach.exploration_policies.OUProcess
|
||||
.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
|
||||
|
||||
ParameterNoise
|
||||
--------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ParameterNoise
|
||||
.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
|
||||
|
||||
TruncatedNormal
|
||||
---------------
|
||||
.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
|
||||
.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
|
||||
|
||||
UCB
|
||||
---
|
||||
.. autoclass:: rl_coach.exploration_policies.UCB
|
||||
.. autoclass:: rl_coach.exploration_policies.ucb.UCB
|
||||
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
|
||||
|
||||
* `Release 0.10.0 <https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)>`_
|
||||
|
||||
* `Release 0.11.0 <https://ai.intel.com/>`_ (current release)
|
||||
* `Release 0.11.0 <https://ai.intel.com/rl-coach-data-science-at-scale/>`_ (current release)
|
||||
|
||||
You can find more details in the `GitHub repository <https://github.com/NervanaSystems/coach>`_.
|
||||
|
||||
|
||||
@@ -264,8 +264,8 @@ spaces.</p>
|
||||
<div class="section" id="explorationpolicy">
|
||||
<h2>ExplorationPolicy<a class="headerlink" href="#explorationpolicy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.ExplorationPolicy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ExplorationPolicy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.exploration_policy.</code><code class="descname">ExplorationPolicy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An exploration policy takes the predicted actions or action values from the agent, and selects the action to
|
||||
actually apply to the environment using some predefined algorithm.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -277,16 +277,16 @@ actually apply to the environment using some predefined algorithm.</p>
|
||||
</tbody>
|
||||
</table>
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.ExplorationPolicy.change_phase">
|
||||
<code class="descname">change_phase</code><span class="sig-paren">(</span><em>phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">
|
||||
<code class="descname">change_phase</code><span class="sig-paren">(</span><em>phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Change between running phases of the algorithm
|
||||
:param phase: Either Heatup or Train
|
||||
:return: none</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.ExplorationPolicy.get_action">
|
||||
<code class="descname">get_action</code><span class="sig-paren">(</span><em>action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">
|
||||
<code class="descname">get_action</code><span class="sig-paren">(</span><em>action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> → Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a list of values corresponding to each action,
|
||||
choose one actions according to the exploration policy
|
||||
:param action_values: A list of action values
|
||||
@@ -294,8 +294,8 @@ choose one actions according to the exploration policy
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">
|
||||
<code class="descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">
|
||||
<code class="descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Allows exploration policies to define if they require the action values for the current step.
|
||||
This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller
|
||||
than epsilon, the action is completely random, and the action values don’t need to be calculated
|
||||
@@ -303,8 +303,8 @@ than epsilon, the action is completely random, and the action values don’t nee
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="rl_coach.exploration_policies.ExplorationPolicy.reset">
|
||||
<code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">
|
||||
<code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Used for resetting the exploration policy parameters when needed
|
||||
:return: None</p>
|
||||
</dd></dl>
|
||||
@@ -315,8 +315,8 @@ than epsilon, the action is completely random, and the action values don’t nee
|
||||
<div class="section" id="additivenoise">
|
||||
<h2>AdditiveNoise<a class="headerlink" href="#additivenoise" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.AdditiveNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">AdditiveNoise</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.additive_noise.AdditiveNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.additive_noise.</code><code class="descname">AdditiveNoise</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.additive_noise.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent
|
||||
and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that
|
||||
can be given in two different ways:
|
||||
@@ -343,8 +343,8 @@ of the action space</li>
|
||||
<div class="section" id="boltzmann">
|
||||
<h2>Boltzmann<a class="headerlink" href="#boltzmann" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.Boltzmann">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Boltzmann</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Boltzmann" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.boltzmann.Boltzmann">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.boltzmann.</code><code class="descname">Boltzmann</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.boltzmann.Boltzmann" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible
|
||||
actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values
|
||||
into a distribution over the actions. It then samples the action for playing out of the calculated distribution.
|
||||
@@ -367,8 +367,8 @@ An additional temperature schedule can be given by the user, and will control th
|
||||
<div class="section" id="bootstrapped">
|
||||
<h2>Bootstrapped<a class="headerlink" href="#bootstrapped" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.Bootstrapped">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Bootstrapped</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Bootstrapped" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.bootstrapped.Bootstrapped">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.bootstrapped.</code><code class="descname">Bootstrapped</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.bootstrapped.Bootstrapped" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Bootstrapped exploration policy is currently only used for discrete action spaces along with the
|
||||
Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the
|
||||
values for all the possible actions. For each episode, a single head is selected to lead the agent, according
|
||||
@@ -401,8 +401,8 @@ if the e-greedy is used for a continuous policy</li>
|
||||
<div class="section" id="categorical">
|
||||
<h2>Categorical<a class="headerlink" href="#categorical" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.Categorical">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Categorical</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Categorical" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.categorical.Categorical">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.categorical.</code><code class="descname">Categorical</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.categorical.Categorical" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Categorical exploration policy is intended for discrete action spaces. It expects the action values to
|
||||
represent a probability distribution over the action, from which a single action will be sampled.
|
||||
In evaluation, the action that has the highest probability will be selected. This is particularly useful for
|
||||
@@ -421,8 +421,8 @@ actor-critic schemes, where the actors output is a probability distribution over
|
||||
<div class="section" id="continuousentropy">
|
||||
<h2>ContinuousEntropy<a class="headerlink" href="#continuousentropy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.ContinuousEntropy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ContinuousEntropy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.continuous_entropy.</code><code class="descname">ContinuousEntropy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Continuous entropy is an exploration policy that is actually implemented as part of the network.
|
||||
The exploration policy class is only a placeholder for choosing this policy. The exploration policy is
|
||||
implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action.
|
||||
@@ -453,8 +453,8 @@ of the action space</li>
|
||||
<div class="section" id="egreedy">
|
||||
<h2>EGreedy<a class="headerlink" href="#egreedy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.EGreedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">EGreedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.EGreedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.e_greedy.EGreedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.e_greedy.</code><code class="descname">EGreedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.e_greedy.EGreedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</p>
|
||||
<p>For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the
|
||||
highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the
|
||||
@@ -485,8 +485,8 @@ if the e-greedy is used for a continuous policy</li>
|
||||
<div class="section" id="greedy">
|
||||
<h2>Greedy<a class="headerlink" href="#greedy" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.Greedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Greedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Greedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.greedy.Greedy">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.greedy.</code><code class="descname">Greedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.greedy.Greedy" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The Greedy exploration policy is intended for both discrete and continuous action spaces.
|
||||
For discrete action spaces, it always selects the action with the maximum value, as given by the agent.
|
||||
For continuous action spaces, it always return the exact action, as it was given by the agent.</p>
|
||||
@@ -504,8 +504,8 @@ For continuous action spaces, it always return the exact action, as it was given
|
||||
<div class="section" id="ouprocess">
|
||||
<h2>OUProcess<a class="headerlink" href="#ouprocess" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.OUProcess">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">OUProcess</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>mu: float = 0</em>, <em>theta: float = 0.15</em>, <em>sigma: float = 0.2</em>, <em>dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.OUProcess" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.ou_process.OUProcess">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ou_process.</code><code class="descname">OUProcess</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>mu: float = 0</em>, <em>theta: float = 0.15</em>, <em>sigma: float = 0.2</em>, <em>dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ou_process.OUProcess" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>OUProcess exploration policy is intended for continuous action spaces, and selects the action according to
|
||||
an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where
|
||||
the samples are correlated between consequent time steps.</p>
|
||||
@@ -523,8 +523,8 @@ the samples are correlated between consequent time steps.</p>
|
||||
<div class="section" id="parameternoise">
|
||||
<h2>ParameterNoise<a class="headerlink" href="#parameternoise" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.ParameterNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ParameterNoise</code><span class="sig-paren">(</span><em>network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ParameterNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.parameter_noise.ParameterNoise">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.parameter_noise.</code><code class="descname">ParameterNoise</code><span class="sig-paren">(</span><em>network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.parameter_noise.ParameterNoise" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.
|
||||
It applies the exploration policy by replacing all the dense network layers with noisy layers.
|
||||
The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network
|
||||
@@ -545,8 +545,8 @@ values.</p>
|
||||
<div class="section" id="truncatednormal">
|
||||
<h2>TruncatedNormal<a class="headerlink" href="#truncatednormal" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.TruncatedNormal">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">TruncatedNormal</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em>, <em>clip_low: float</em>, <em>clip_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.truncated_normal.TruncatedNormal">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.truncated_normal.</code><code class="descname">TruncatedNormal</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em>, <em>clip_low: float</em>, <em>clip_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.truncated_normal.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a
|
||||
normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t
|
||||
wo different ways:
|
||||
@@ -575,8 +575,8 @@ of the action space</li>
|
||||
<div class="section" id="ucb">
|
||||
<h2>UCB<a class="headerlink" href="#ucb" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.exploration_policies.UCB">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">UCB</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>lamb: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.UCB" title="Permalink to this definition">¶</a></dt>
|
||||
<dt id="rl_coach.exploration_policies.ucb.UCB">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ucb.</code><code class="descname">UCB</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>lamb: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ucb.UCB" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
|
||||
It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
|
||||
between the heads predictions represents the uncertainty of the agent in each of the actions.
|
||||
|
||||
@@ -226,7 +226,7 @@
|
||||
</li>
|
||||
<li><a href="components/agents/policy_optimization/ac.html#rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters">ActorCriticAlgorithmParameters (class in rl_coach.agents.actor_critic_agent)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.AdditiveNoise">AdditiveNoise (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.additive_noise.AdditiveNoise">AdditiveNoise (class in rl_coach.exploration_policies.additive_noise)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
@@ -262,11 +262,11 @@
|
||||
</li>
|
||||
<li><a href="components/agents/imitation/bc.html#rl_coach.agents.bc_agent.BCAlgorithmParameters">BCAlgorithmParameters (class in rl_coach.agents.bc_agent)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Boltzmann">Boltzmann (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.boltzmann.Boltzmann">Boltzmann (class in rl_coach.exploration_policies.boltzmann)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Bootstrapped">Bootstrapped (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.bootstrapped.Bootstrapped">Bootstrapped (class in rl_coach.exploration_policies.bootstrapped)</a>
|
||||
</li>
|
||||
<li><a href="components/spaces.html#rl_coach.spaces.BoxActionSpace">BoxActionSpace (class in rl_coach.spaces)</a>
|
||||
</li>
|
||||
@@ -288,11 +288,11 @@
|
||||
</ul></li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.carla_environment.CarlaEnvironment">CarlaEnvironment (class in rl_coach.environments.carla_environment)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Categorical">Categorical (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.categorical.Categorical">Categorical (class in rl_coach.exploration_policies.categorical)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/value_optimization/categorical_dqn.html#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters">CategoricalDQNAlgorithmParameters (class in rl_coach.agents.categorical_dqn_agent)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.change_phase">change_phase() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">change_phase() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.choose_action">choose_action() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
@@ -328,7 +328,7 @@
|
||||
</li>
|
||||
<li><a href="components/architectures/index.html#rl_coach.architectures.architecture.Architecture.construct">construct() (rl_coach.architectures.architecture.Architecture static method)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ContinuousEntropy">ContinuousEntropy (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)</a>
|
||||
</li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.control_suite_environment.ControlSuiteEnvironment">ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)</a>
|
||||
</li>
|
||||
@@ -368,7 +368,7 @@
|
||||
<h2 id="E">E</h2>
|
||||
<table style="width: 100%" class="indextable genindextable"><tr>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.EGreedy">EGreedy (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">EGreedy (class in rl_coach.exploration_policies.e_greedy)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.emulate_act_on_trainer">emulate_act_on_trainer() (rl_coach.agents.agent.Agent method)</a>
|
||||
|
||||
@@ -398,7 +398,7 @@
|
||||
</li>
|
||||
<li><a href="components/memories/index.html#rl_coach.memories.non_episodic.ExperienceReplay">ExperienceReplay (class in rl_coach.memories.non_episodic)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy">ExplorationPolicy (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">ExplorationPolicy (class in rl_coach.exploration_policies.exploration_policy)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
</tr></table>
|
||||
@@ -416,7 +416,7 @@
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Batch.game_overs">game_overs() (rl_coach.core_types.Batch method)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.get_action">get_action() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">get_action() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
|
||||
</li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.environment.Environment.get_action_from_user">get_action_from_user() (rl_coach.environments.environment.Environment method)</a>
|
||||
</li>
|
||||
@@ -466,7 +466,7 @@
|
||||
</li>
|
||||
<li><a href="components/spaces.html#rl_coach.spaces.GoalsSpace.DistanceMetric">GoalsSpace.DistanceMetric (class in rl_coach.spaces)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Greedy">Greedy (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.greedy.Greedy">Greedy (class in rl_coach.exploration_policies.greedy)</a>
|
||||
</li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.gym_environment.GymEnvironment">GymEnvironment (class in rl_coach.environments.gym_environment)</a>
|
||||
</li>
|
||||
@@ -626,7 +626,7 @@
|
||||
<li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.observe">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.OUProcess">OUProcess (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ou_process.OUProcess">OUProcess (class in rl_coach.exploration_policies.ou_process)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
</tr></table>
|
||||
@@ -640,7 +640,7 @@
|
||||
</li>
|
||||
<li><a href="components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction">parallel_prediction() (rl_coach.architectures.network_wrapper.NetworkWrapper method)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ParameterNoise">ParameterNoise (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.parameter_noise.ParameterNoise">ParameterNoise (class in rl_coach.exploration_policies.parameter_noise)</a>
|
||||
</li>
|
||||
<li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.parent">parent (rl_coach.agents.agent.Agent attribute)</a>
|
||||
|
||||
@@ -714,9 +714,9 @@
|
||||
</ul></li>
|
||||
<li><a href="components/environments/index.html#rl_coach.environments.environment.Environment.render">render() (rl_coach.environments.environment.Environment method)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">requires_action_values() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">requires_action_values() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.reset">reset() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">reset() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
|
||||
</li>
|
||||
<li><a href="components/architectures/index.html#rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients">reset_accumulated_gradients() (rl_coach.architectures.architecture.Architecture method)</a>
|
||||
</li>
|
||||
@@ -870,7 +870,7 @@
|
||||
</li>
|
||||
<li><a href="components/memories/index.html#rl_coach.memories.non_episodic.TransitionCollection">TransitionCollection (class in rl_coach.memories.non_episodic)</a>
|
||||
</li>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.TruncatedNormal">TruncatedNormal (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.truncated_normal.TruncatedNormal">TruncatedNormal (class in rl_coach.exploration_policies.truncated_normal)</a>
|
||||
</li>
|
||||
</ul></td>
|
||||
</tr></table>
|
||||
@@ -878,7 +878,7 @@
|
||||
<h2 id="U">U</h2>
|
||||
<table style="width: 100%" class="indextable genindextable"><tr>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.UCB">UCB (class in rl_coach.exploration_policies)</a>
|
||||
<li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ucb.UCB">UCB (class in rl_coach.exploration_policies.ucb)</a>
|
||||
</li>
|
||||
<li><a href="components/core_types.html#rl_coach.core_types.Episode.update_discounted_rewards">update_discounted_rewards() (rl_coach.core_types.Episode method)</a>
|
||||
</li>
|
||||
|
||||
@@ -194,7 +194,7 @@ Coach collects statistics from the training process and supports advanced visual
|
||||
<li><a class="reference external" href="https://ai.intel.com/reinforcement-learning-coach-intel/">Release 0.8.0</a> (initial release)</li>
|
||||
<li><a class="reference external" href="https://ai.intel.com/reinforcement-learning-coach-carla-qr-dqn/">Release 0.9.0</a></li>
|
||||
<li><a class="reference external" href="https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)">Release 0.10.0</a></li>
|
||||
<li><a class="reference external" href="https://ai.intel.com/">Release 0.11.0</a> (current release)</li>
|
||||
<li><a class="reference external" href="https://ai.intel.com/rl-coach-data-science-at-scale/">Release 0.11.0</a> (current release)</li>
|
||||
</ul>
|
||||
<p>You can find more details in the <a class="reference external" href="https://github.com/NervanaSystems/coach">GitHub repository</a>.</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
|
||||
BIN
docs/objects.inv
BIN
docs/objects.inv
Binary file not shown.
File diff suppressed because one or more lines are too long
@@ -38,50 +38,50 @@ spaces.
|
||||
|
||||
ExplorationPolicy
|
||||
-----------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
|
||||
.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
|
||||
:members:
|
||||
:inherited-members:
|
||||
|
||||
AdditiveNoise
|
||||
-------------
|
||||
.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
|
||||
.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
|
||||
|
||||
Boltzmann
|
||||
---------
|
||||
.. autoclass:: rl_coach.exploration_policies.Boltzmann
|
||||
.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
|
||||
|
||||
Bootstrapped
|
||||
------------
|
||||
.. autoclass:: rl_coach.exploration_policies.Bootstrapped
|
||||
.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
|
||||
|
||||
Categorical
|
||||
-----------
|
||||
.. autoclass:: rl_coach.exploration_policies.Categorical
|
||||
.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
|
||||
|
||||
ContinuousEntropy
|
||||
-----------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
|
||||
.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
|
||||
|
||||
EGreedy
|
||||
-------
|
||||
.. autoclass:: rl_coach.exploration_policies.EGreedy
|
||||
.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
|
||||
|
||||
Greedy
|
||||
------
|
||||
.. autoclass:: rl_coach.exploration_policies.Greedy
|
||||
.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
|
||||
|
||||
OUProcess
|
||||
---------
|
||||
.. autoclass:: rl_coach.exploration_policies.OUProcess
|
||||
.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
|
||||
|
||||
ParameterNoise
|
||||
--------------
|
||||
.. autoclass:: rl_coach.exploration_policies.ParameterNoise
|
||||
.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
|
||||
|
||||
TruncatedNormal
|
||||
---------------
|
||||
.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
|
||||
.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
|
||||
|
||||
UCB
|
||||
---
|
||||
.. autoclass:: rl_coach.exploration_policies.UCB
|
||||
.. autoclass:: rl_coach.exploration_policies.ucb.UCB
|
||||
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
|
||||
|
||||
* `Release 0.10.0 <https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)>`_
|
||||
|
||||
* `Release 0.11.0 <https://ai.intel.com/>`_ (current release)
|
||||
* `Release 0.11.0 <https://ai.intel.com/rl-coach-data-science-at-scale/>`_ (current release)
|
||||
|
||||
You can find more details in the `GitHub repository <https://github.com/NervanaSystems/coach>`_.
|
||||
|
||||
|
||||
@@ -15,13 +15,11 @@
|
||||
#
|
||||
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, List, Union, Tuple
|
||||
|
||||
import numpy as np
|
||||
from pandas import read_pickle
|
||||
from six.moves import range
|
||||
|
||||
from rl_coach.agents.agent_interface import AgentInterface
|
||||
|
||||
@@ -35,7 +35,6 @@ from multiprocessing.managers import BaseManager
|
||||
import subprocess
|
||||
from rl_coach.graph_managers.graph_manager import HumanPlayScheduleParameters, GraphManager
|
||||
from rl_coach.utils import list_all_presets, short_dynamic_import, get_open_port, SharedMemoryScratchPad, get_base_dir
|
||||
from rl_coach.agents.human_agent import HumanAgentParameters
|
||||
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
|
||||
from rl_coach.environments.environment import SingleLevelSelection
|
||||
from rl_coach.memories.backend.redis import RedisPubSubMemoryBackendParameters
|
||||
@@ -229,6 +228,8 @@ class CoachLauncher(object):
|
||||
|
||||
# for human play we need to create a custom graph manager
|
||||
if args.play:
|
||||
from rl_coach.agents.human_agent import HumanAgentParameters
|
||||
|
||||
env_params = short_dynamic_import(args.environment_type, ignore_module_case=True)()
|
||||
env_params.human_control = True
|
||||
schedule_params = HumanPlayScheduleParameters()
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from .additive_noise import AdditiveNoiseParameters, AdditiveNoise
|
||||
from .boltzmann import BoltzmannParameters, Boltzmann
|
||||
from .bootstrapped import BootstrappedParameters, Bootstrapped
|
||||
from .categorical import CategoricalParameters, Categorical
|
||||
from .continuous_entropy import ContinuousEntropyParameters, ContinuousEntropy
|
||||
from .e_greedy import EGreedyParameters, EGreedy
|
||||
from .exploration_policy import ExplorationParameters, ExplorationPolicy
|
||||
from .greedy import GreedyParameters, Greedy
|
||||
from .ou_process import OUProcessParameters, OUProcess
|
||||
from .parameter_noise import ParameterNoiseParameters, ParameterNoise
|
||||
from .truncated_normal import TruncatedNormalParameters, TruncatedNormal
|
||||
from .ucb import UCBParameters, UCB
|
||||
|
||||
__all__ = [
|
||||
'AdditiveNoiseParameters',
|
||||
'AdditiveNoise',
|
||||
'BoltzmannParameters',
|
||||
'Boltzmann',
|
||||
'BootstrappedParameters',
|
||||
'Bootstrapped',
|
||||
'CategoricalParameters',
|
||||
'Categorical',
|
||||
'ContinuousEntropyParameters',
|
||||
'ContinuousEntropy',
|
||||
'EGreedyParameters',
|
||||
'EGreedy',
|
||||
'ExplorationParameters',
|
||||
'ExplorationPolicy',
|
||||
'GreedyParameters',
|
||||
'Greedy',
|
||||
'OUProcessParameters',
|
||||
'OUProcess',
|
||||
'ParameterNoiseParameters',
|
||||
'ParameterNoise',
|
||||
'TruncatedNormalParameters',
|
||||
'TruncatedNormal',
|
||||
'UCBParameters',
|
||||
'UCB'
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user