1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 03:30:19 +01:00
This commit is contained in:
Gal Leibovich
2019-06-16 11:11:21 +03:00
committed by GitHub
parent 8df3c46756
commit 7eb884c5b2
107 changed files with 2200 additions and 495 deletions

View File

@@ -126,6 +126,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>

View File

@@ -124,6 +124,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -244,7 +245,7 @@ probability distribution. Only the target of the actions that were actually ta
</ol>
<dl class="class">
<dt id="rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.categorical_dqn_agent.</code><code class="descname">CategoricalDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/categorical_dqn_agent.html#CategoricalDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.categorical_dqn_agent.</code><code class="sig-name descname">CategoricalDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/categorical_dqn_agent.html#CategoricalDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Double DQN</a><ul>

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -243,7 +244,7 @@ Set those values as the targets for the actions that were not actually played.</
</ol>
<dl class="class">
<dt id="rl_coach.agents.dqn_agent.DQNAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.dqn_agent.</code><code class="descname">DQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dqn_agent.html#DQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dqn_agent.DQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.dqn_agent.</code><code class="sig-name descname">DQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/dqn_agent.html#DQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.dqn_agent.DQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</div>

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -240,7 +241,7 @@
Once in every few thousand steps, copy the weights from the online network to the target network.</p>
<dl class="class">
<dt id="rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.mmc_agent.</code><code class="descname">MixedMonteCarloAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/mmc_agent.html#MixedMonteCarloAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.mmc_agent.</code><code class="sig-name descname">MixedMonteCarloAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/mmc_agent.html#MixedMonteCarloAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>monte_carlo_mixing_rate</strong> (float)

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -242,7 +243,7 @@ where <span class="math notranslate nohighlight">\(k\)</span> is <span class="ma
</ol>
<dl class="class">
<dt id="rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.n_step_q_agent.</code><code class="descname">NStepQAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/n_step_q_agent.html#NStepQAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.n_step_q_agent.</code><code class="sig-name descname">NStepQAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/n_step_q_agent.html#NStepQAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -243,7 +244,7 @@ and <span class="math notranslate nohighlight">\(y_t\)</span> as the targets.
After every training step, use a soft update in order to copy the weights from the online network to the target network.</p>
<dl class="class">
<dt id="rl_coach.agents.naf_agent.NAFAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.naf_agent.</code><code class="descname">NAFAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/naf_agent.html#NAFAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.naf_agent.NAFAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.naf_agent.</code><code class="sig-name descname">NAFAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/naf_agent.html#NAFAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.naf_agent.NAFAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</div>

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -258,7 +259,7 @@ the network if necessary:
<span class="math notranslate nohighlight">\(y_t=\sum_{j=0}^{N-1}\gamma^j r(s_{t+j},a_{t+j} ) +\gamma^N max_a Q(s_{t+N},a)\)</span></p>
<dl class="class">
<dt id="rl_coach.agents.nec_agent.NECAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.nec_agent.</code><code class="descname">NECAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/nec_agent.html#NECAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.nec_agent.NECAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.nec_agent.</code><code class="sig-name descname">NECAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/nec_agent.html#NECAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.nec_agent.NECAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -251,7 +252,7 @@ has the highest predicted <span class="math notranslate nohighlight">\(Q\)</span
</ol>
<dl class="class">
<dt id="rl_coach.agents.pal_agent.PALAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.pal_agent.</code><code class="descname">PALAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/pal_agent.html#PALAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.pal_agent.PALAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.pal_agent.</code><code class="sig-name descname">PALAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/pal_agent.html#PALAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.pal_agent.PALAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -241,7 +242,7 @@ quantile locations. Only the targets of the actions that were actually taken are
</ol>
<dl class="class">
<dt id="rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.qr_dqn_agent.</code><code class="descname">QuantileRegressionDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/qr_dqn_agent.html#QuantileRegressionDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.qr_dqn_agent.</code><code class="sig-name descname">QuantileRegressionDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/qr_dqn_agent.html#QuantileRegressionDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@@ -117,6 +117,7 @@
<li class="toctree-l2"><a class="reference internal" href="../imitation/cil.html">Conditional Imitation Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/cppo.html">Clipped Proximal Policy Optimization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/ddpg.html">Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/td3.html">Twin Delayed Deep Deterministic Policy Gradient</a></li>
<li class="toctree-l2"><a class="reference internal" href="../policy_optimization/sac.html">Soft Actor-Critic</a></li>
<li class="toctree-l2"><a class="reference internal" href="../other/dfp.html">Direct Future Prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="double_dqn.html">Double DQN</a></li>
@@ -256,7 +257,7 @@ using the KL divergence loss that is returned from the network.</p></li>
</ol>
<dl class="class">
<dt id="rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters">
<em class="property">class </em><code class="descclassname">rl_coach.agents.rainbow_dqn_agent.</code><code class="descname">RainbowDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/rainbow_dqn_agent.html#RainbowDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<em class="property">class </em><code class="sig-prename descclassname">rl_coach.agents.rainbow_dqn_agent.</code><code class="sig-name descname">RainbowDQNAlgorithmParameters</code><a class="reference internal" href="../../../_modules/rl_coach/agents/rainbow_dqn_agent.html#RainbowDQNAlgorithmParameters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters" title="Permalink to this definition"></a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">