mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
TD3 (#338)
This commit is contained in:
@@ -37,7 +37,7 @@
|
||||
<link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<link rel="next" title="Soft Actor-Critic" href="sac.html" />
|
||||
<link rel="next" title="Twin Delayed Deep Deterministic Policy Gradient" href="td3.html" />
|
||||
<link rel="prev" title="Clipped Proximal Policy Optimization" href="cppo.html" />
|
||||
<link href="../../../_static/css/custom.css" rel="stylesheet" type="text/css">
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="sac.html">Soft Actor-Critic</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../value_optimization/double_dqn.html">Double DQN</a></li>
|
||||
@@ -257,7 +258,7 @@ given <span class="math notranslate nohighlight">\(\nabla_a Q(s,a)\)</span>. Fin
|
||||
<p>After every training step, do a soft update of the critic and actor target networks’ weights from the online networks.</p>
|
||||
<dl class="class">
|
||||
<dt id="rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters">
|
||||
<em class="property">class </em><code class="descclassname">rl_coach.agents.ddpg_agent.</code><code class="descname">DDPGAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ddpg_agent.html#DDPGAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.ddpg_agent.</code><code class="sig-name descname">DDPGAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/ddpg_agent.html#DDPGAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters</dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
@@ -297,7 +298,7 @@ values. If set to False, the terminal states reward will be taken as the target
|
||||
|
||||
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
|
||||
|
||||
<a href="sac.html" class="btn btn-neutral float-right" title="Soft Actor-Critic" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
<a href="td3.html" class="btn btn-neutral float-right" title="Twin Delayed Deep Deterministic Policy Gradient" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
|
||||
|
||||
|
||||
<a href="cppo.html" class="btn btn-neutral float-left" title="Clipped Proximal Policy Optimization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
|
||||
|
||||
Reference in New Issue
Block a user