mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
TD3 (#338)
This commit is contained in:
@@ -365,7 +365,7 @@
|
||||
<span class="n">action_values</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># choose action according to the exploration policy and the current phase (evaluating or training the agent)</span>
|
||||
<span class="n">action</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">exploration_policy</span><span class="o">.</span><span class="n">get_action</span><span class="p">(</span><span class="n">action_values</span><span class="p">)</span>
|
||||
<span class="n">action</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">exploration_policy</span><span class="o">.</span><span class="n">get_action</span><span class="p">(</span><span class="n">action_values</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">action_values</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">action_values</span> <span class="o">=</span> <span class="n">action_values</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
|
||||
|
||||
Reference in New Issue
Block a user