mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Docs changes - fixing blogpost links, removing importing all exploration policies (#139)
* updated docs * removing imports for all exploration policies in __init__ + setting the right blog-post link * small cleanups
This commit is contained in:
committed by
Scott Leishman
parent
155b78b995
commit
f12857a8c7
@@ -195,13 +195,11 @@
|
||||
<span class="c1">#</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">copy</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">random</span>
|
||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">OrderedDict</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Tuple</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">pandas</span> <span class="k">import</span> <span class="n">read_pickle</span>
|
||||
<span class="kn">from</span> <span class="nn">six.moves</span> <span class="k">import</span> <span class="nb">range</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.agents.agent_interface</span> <span class="k">import</span> <span class="n">AgentInterface</span>
|
||||
|
||||
@@ -215,7 +215,8 @@
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">num_steps_between_copying_online_weights_to_target</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span></div>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">True</span></div>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">DQNNetworkParameters</span><span class="p">(</span><span class="n">NetworkParameters</span><span class="p">):</span>
|
||||
|
||||
@@ -391,6 +391,9 @@
|
||||
<span class="c1"># Should the workers wait for full episode</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># Support for parameter noise</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="PresetValidationParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.PresetValidationParameters">[docs]</a><span class="k">class</span> <span class="nc">PresetValidationParameters</span><span class="p">(</span><span class="n">Parameters</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.data_stores.nfs_data_store</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.data_stores.s3_data_store</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
|
||||
<span class="kn">from</span> <span class="nn">minio</span> <span class="k">import</span> <span class="n">Minio</span>
|
||||
<span class="kn">from</span> <span class="nn">minio.error</span> <span class="k">import</span> <span class="n">ResponseError</span>
|
||||
<span class="kn">from</span> <span class="nn">configparser</span> <span class="k">import</span> <span class="n">ConfigParser</span><span class="p">,</span> <span class="n">Error</span>
|
||||
|
||||
@@ -216,7 +216,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.additive_noise:AdditiveNoise'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.additive_noise.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent</span>
|
||||
<span class="sd"> and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that</span>
|
||||
|
||||
@@ -215,7 +215,7 @@
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.boltzmann.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible</span>
|
||||
<span class="sd"> actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values</span>
|
||||
|
||||
@@ -218,7 +218,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.bootstrapped:Bootstrapped'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.bootstrapped.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Bootstrapped exploration policy is currently only used for discrete action spaces along with the</span>
|
||||
<span class="sd"> Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the</span>
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.categorical:Categorical'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.categorical.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Categorical exploration policy is intended for discrete action spaces. It expects the action values to</span>
|
||||
<span class="sd"> represent a probability distribution over the action, from which a single action will be sampled.</span>
|
||||
|
||||
@@ -203,7 +203,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Continuous entropy is an exploration policy that is actually implemented as part of the network.</span>
|
||||
<span class="sd"> The exploration policy class is only a placeholder for choosing this policy. The exploration policy is</span>
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.e_greedy:EGreedy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</span>
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.exploration_policy:ExplorationPolicy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An exploration policy takes the predicted actions or action values from the agent, and selects the action to</span>
|
||||
<span class="sd"> actually apply to the environment using some predefined algorithm.</span>
|
||||
@@ -222,14 +222,14 @@
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">HEATUP</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">action_space</span>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.reset">[docs]</a> <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">[docs]</a> <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Used for resetting the exploration policy parameters when needed</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">pass</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.get_action">[docs]</a> <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-></span> <span class="n">ActionType</span><span class="p">:</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">[docs]</a> <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-></span> <span class="n">ActionType</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Given a list of values corresponding to each action, </span>
|
||||
<span class="sd"> choose one actions according to the exploration policy</span>
|
||||
@@ -243,7 +243,7 @@
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The get_action function should be overridden in the inheriting exploration class"</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.change_phase">[docs]</a> <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">[docs]</a> <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Change between running phases of the algorithm</span>
|
||||
<span class="sd"> :param phase: Either Heatup or Train</span>
|
||||
@@ -251,7 +251,7 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">phase</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">[docs]</a> <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">[docs]</a> <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Allows exploration policies to define if they require the action values for the current step.</span>
|
||||
<span class="sd"> This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller</span>
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.greedy:Greedy'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.greedy.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The Greedy exploration policy is intended for both discrete and continuous action spaces.</span>
|
||||
<span class="sd"> For discrete action spaces, it always selects the action with the maximum value, as given by the agent.</span>
|
||||
|
||||
@@ -219,7 +219,7 @@
|
||||
|
||||
|
||||
<span class="c1"># Ornstein-Uhlenbeck process</span>
|
||||
<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ou_process.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> OUProcess exploration policy is intended for continuous action spaces, and selects the action according to</span>
|
||||
<span class="sd"> an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where</span>
|
||||
|
||||
@@ -210,7 +210,8 @@
|
||||
<span class="k">class</span> <span class="nc">ParameterNoiseParameters</span><span class="p">(</span><span class="n">ExplorationParameters</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent_params</span><span class="p">:</span> <span class="n">AgentParameters</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">agent_params</span><span class="p">,</span> <span class="n">DQNAgentParameters</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">agent_params</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">supports_parameter_noise</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Currently only DQN variants are supported for using an exploration type of "</span>
|
||||
<span class="s2">"ParameterNoise."</span><span class="p">)</span>
|
||||
|
||||
@@ -221,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.parameter_noise:ParameterNoise'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.parameter_noise.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.</span>
|
||||
<span class="sd"> It applies the exploration policy by replacing all the dense network layers with noisy layers.</span>
|
||||
|
||||
@@ -218,7 +218,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.truncated_normal:TruncatedNormal'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.truncated_normal.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a</span>
|
||||
<span class="sd"> normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t</span>
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
<span class="k">return</span> <span class="s1">'rl_coach.exploration_policies.ucb:UCB'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ucb.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.</span>
|
||||
<span class="sd"> It assumes that there are multiple network heads that are predicting action values, and that the standard deviation</span>
|
||||
|
||||
@@ -178,7 +178,23 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.memories.backend.redis</h1><div class="highlight"><pre>
|
||||
<span></span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">redis</span>
|
||||
<span class="kn">import</span> <span class="nn">pickle</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.memories.non_episodic.transition_collection</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TransitionCollection"><a class="viewcode-back" href="../../../../components/memories/index.html#rl_coach.memories.non_episodic.TransitionCollection">[docs]</a><span class="k">class</span> <span class="nc">TransitionCollection</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
|
||||
@@ -178,7 +178,24 @@
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for rl_coach.orchestrators.kubernetes_orchestrator</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
||||
<span></span><span class="c1">#</span>
|
||||
<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
|
||||
<span class="c1"># you may not use this file except in compliance with the License.</span>
|
||||
<span class="c1"># You may obtain a copy of the License at</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
|
||||
<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
|
||||
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
|
||||
<span class="c1"># See the License for the specific language governing permissions and</span>
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
|
||||
Reference in New Issue
Block a user