mirror of
https://github.com/gryf/coach.git
synced 2026-04-28 19:44:09 +02:00
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Overview: module code — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Overview: module code — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.acer_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.acer_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.actor_critic_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.actor_critic_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.agent_interface — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.agent_interface — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.bc_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.bc_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.categorical_dqn_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.categorical_dqn_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.cil_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.cil_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.clipped_ppo_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.clipped_ppo_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.ddpg_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.ddpg_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.dfp_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.dfp_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.dqn_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.dqn_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.mmc_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.mmc_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.n_step_q_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.n_step_q_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.naf_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.naf_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.nec_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.nec_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.pal_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.pal_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.policy_gradients_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.policy_gradients_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.ppo_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.ppo_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.qr_dqn_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.qr_dqn_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.rainbow_dqn_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.rainbow_dqn_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.soft_actor_critic_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.soft_actor_critic_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.agents.value_optimization_agent — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.agents.value_optimization_agent — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.architectures.architecture — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.architectures.architecture — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.architectures.network_wrapper — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.architectures.network_wrapper — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.base_parameters — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.base_parameters — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.core_types — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.core_types — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.data_stores.nfs_data_store — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.data_stores.nfs_data_store — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.data_stores.s3_data_store — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.data_stores.s3_data_store — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.carla_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.carla_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.control_suite_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.control_suite_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.doom_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.doom_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.gym_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.gym_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
@@ -486,7 +486,6 @@
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span> <span class="o">=</span> <span class="s1">'mujoco'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_roboschool_env</span> <span class="o">=</span> <span class="s1">'roboschool'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span> <span class="o">=</span> <span class="s1">'Atari'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timelimit_env_wrapper</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">frameskip</span> <span class="o">=</span> <span class="mi">1</span> <span class="c1"># this accesses the atari env that is wrapped with a timelimit wrapper env</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env_id</span> <span class="o">==</span> <span class="s2">"SpaceInvadersDeterministic-v4"</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_skip</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span>
|
||||
@@ -588,12 +587,6 @@
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">native_rendering</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">renderer</span><span class="o">.</span><span class="n">create_screen</span><span class="p">(</span><span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="n">scale</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="n">scale</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># measurements</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">spec</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timestep_limit</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">timestep_limit</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timestep_limit</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># the info is only updated after the first step</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">default_action</span><span class="p">)</span><span class="o">.</span><span class="n">next_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="s1">'measurements'</span><span class="p">]</span> <span class="o">=</span> <span class="n">VectorObservationSpace</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">info</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
|
||||
@@ -653,7 +646,7 @@
|
||||
<span class="k">def</span> <span class="nf">_restart_environment_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force_environment_reset</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="c1"># prevent reset of environment if there are ale lives left</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">ale</span><span class="o">.</span><span class="n">lives</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> \
|
||||
<span class="ow">and</span> <span class="ow">not</span> <span class="n">force_environment_reset</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">timelimit_env_wrapper</span><span class="o">.</span><span class="n">_past_limit</span><span class="p">():</span>
|
||||
<span class="ow">and</span> <span class="ow">not</span> <span class="n">force_environment_reset</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">default_action</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.starcraft2_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.starcraft2_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.additive_noise — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.additive_noise — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.boltzmann — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.boltzmann — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.bootstrapped — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.bootstrapped — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.categorical — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.categorical — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.continuous_entropy — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.continuous_entropy — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.e_greedy — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.e_greedy — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.exploration_policy — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.exploration_policy — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.greedy — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.greedy — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.ou_process — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.ou_process — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.parameter_noise — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.parameter_noise — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.truncated_normal — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.truncated_normal — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.exploration_policies.ucb — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.exploration_policies.ucb — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.attention_discretization — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.attention_discretization — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.box_discretization — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.box_discretization — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.box_masking — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.box_masking — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.full_discrete_action_space_map — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.full_discrete_action_space_map — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.linear_box_to_box_map — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.linear_box_to_box_map — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.action.partial_discrete_action_space_map — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.action.partial_discrete_action_space_map — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_clipping_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_clipping_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_crop_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_crop_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_move_axis_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_move_axis_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_normalization_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_normalization_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_rescale_size_by_factor_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_rescale_size_by_factor_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_rescale_to_size_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_rescale_to_size_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_rgb_to_y_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_rgb_to_y_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_squeeze_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_squeeze_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_stacking_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_stacking_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.observation.observation_to_uint8_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.observation.observation_to_uint8_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.reward.reward_clipping_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.reward.reward_clipping_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.reward.reward_normalization_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.reward.reward_normalization_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.filters.reward.reward_rescale_filter — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.filters.reward.reward_rescale_filter — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.backend.redis — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.backend.redis — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.episodic.episodic_experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.episodic.episodic_experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.episodic.episodic_hindsight_experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.episodic.episodic_hindsight_experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.episodic.single_episode_buffer — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.episodic.single_episode_buffer — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.non_episodic.balanced_experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.non_episodic.balanced_experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.non_episodic.differentiable_neural_dictionary — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.non_episodic.differentiable_neural_dictionary — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.non_episodic.experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.non_episodic.experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.non_episodic.prioritized_experience_replay — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.non_episodic.prioritized_experience_replay — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.memories.non_episodic.transition_collection — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.memories.non_episodic.transition_collection — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.orchestrators.kubernetes_orchestrator — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.orchestrators.kubernetes_orchestrator — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.spaces — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.spaces — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ Blog posts from the Intel® AI website:
|
||||
|
||||
* `Release 0.11.0 <https://ai.intel.com/rl-coach-data-science-at-scale/>`_
|
||||
|
||||
* Release 0.12.1 (current release)
|
||||
* Release 0.12.0 (current release)
|
||||
|
||||
You can find more details in the `GitHub repository <https://github.com/NervanaSystems/coach>`_.
|
||||
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
var DOCUMENTATION_OPTIONS = {
|
||||
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
|
||||
VERSION: '0.12.1',
|
||||
VERSION: '0.12.0',
|
||||
LANGUAGE: 'None',
|
||||
COLLAPSE_INDEX: false,
|
||||
FILE_SUFFIX: '.html',
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Additional Parameters — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Additional Parameters — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Behavioral Cloning — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Behavioral Cloning — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Conditional Imitation Learning — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Conditional Imitation Learning — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Agents — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Agents — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Direct Future Prediction — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Direct Future Prediction — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Actor-Critic — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Actor-Critic — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>ACER — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>ACER — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Clipped Proximal Policy Optimization — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Clipped Proximal Policy Optimization — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Deep Deterministic Policy Gradient — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Deep Deterministic Policy Gradient — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Hierarchical Actor Critic — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Hierarchical Actor Critic — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Policy Gradient — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Policy Gradient — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Proximal Policy Optimization — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Proximal Policy Optimization — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Soft Actor-Critic — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Soft Actor-Critic — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Bootstrapped DQN — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Bootstrapped DQN — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Categorical DQN — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Categorical DQN — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Double DQN — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Double DQN — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Deep Q Networks — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Deep Q Networks — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Dueling DQN — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Dueling DQN — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Mixed Monte Carlo — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>Mixed Monte Carlo — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user