mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
@@ -8,7 +8,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>rl_coach.environments.gym_environment — Reinforcement Learning Coach 0.12.1 documentation</title>
|
||||
<title>rl_coach.environments.gym_environment — Reinforcement Learning Coach 0.12.0 documentation</title>
|
||||
|
||||
|
||||
|
||||
@@ -486,7 +486,6 @@
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_mujoco_env</span> <span class="o">=</span> <span class="s1">'mujoco'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_roboschool_env</span> <span class="o">=</span> <span class="s1">'roboschool'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span> <span class="o">=</span> <span class="s1">'Atari'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timelimit_env_wrapper</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">frameskip</span> <span class="o">=</span> <span class="mi">1</span> <span class="c1"># this accesses the atari env that is wrapped with a timelimit wrapper env</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env_id</span> <span class="o">==</span> <span class="s2">"SpaceInvadersDeterministic-v4"</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_skip</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span>
|
||||
@@ -588,12 +587,6 @@
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">native_rendering</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">renderer</span><span class="o">.</span><span class="n">create_screen</span><span class="p">(</span><span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="n">scale</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="n">scale</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># measurements</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">spec</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timestep_limit</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">timestep_limit</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timestep_limit</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># the info is only updated after the first step</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">default_action</span><span class="p">)</span><span class="o">.</span><span class="n">next_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state_space</span><span class="p">[</span><span class="s1">'measurements'</span><span class="p">]</span> <span class="o">=</span> <span class="n">VectorObservationSpace</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">info</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
|
||||
@@ -653,7 +646,7 @@
|
||||
<span class="k">def</span> <span class="nf">_restart_environment_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force_environment_reset</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="c1"># prevent reset of environment if there are ale lives left</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">is_atari_env</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">unwrapped</span><span class="o">.</span><span class="n">ale</span><span class="o">.</span><span class="n">lives</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> \
|
||||
<span class="ow">and</span> <span class="ow">not</span> <span class="n">force_environment_reset</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">timelimit_env_wrapper</span><span class="o">.</span><span class="n">_past_limit</span><span class="p">():</span>
|
||||
<span class="ow">and</span> <span class="ow">not</span> <span class="n">force_environment_reset</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">default_action</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
|
||||
Reference in New Issue
Block a user