1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Docs changes - fixing blogpost links, removing importing all exploration policies (#139)

* updated docs

* removing imports for all exploration policies in __init__ + setting the right blog-post link

* small cleanups
This commit is contained in:
Gal Leibovich
2018-12-05 23:16:16 +02:00
committed by Scott Leishman
parent 155b78b995
commit f12857a8c7
33 changed files with 191 additions and 160 deletions

View File

@@ -195,13 +195,11 @@
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">copy</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">random</span>
<span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">OrderedDict</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Tuple</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">pandas</span> <span class="k">import</span> <span class="n">read_pickle</span>
<span class="kn">from</span> <span class="nn">six.moves</span> <span class="k">import</span> <span class="nb">range</span>
<span class="kn">from</span> <span class="nn">rl_coach.agents.agent_interface</span> <span class="k">import</span> <span class="n">AgentInterface</span>

View File

@@ -215,7 +215,8 @@
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_steps_between_copying_online_weights_to_target</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span></div>
<span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span>
<span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">True</span></div>
<span class="k">class</span> <span class="nc">DQNNetworkParameters</span><span class="p">(</span><span class="n">NetworkParameters</span><span class="p">):</span>