mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Batch RL Tutorial (#372)
This commit is contained in:
@@ -198,6 +198,8 @@
|
||||
<span class="c1"># limitations under the License.</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="kn">import</span> <span class="nn">ast</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">pickle</span>
|
||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="k">import</span> <span class="n">deepcopy</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">math</span>
|
||||
@@ -324,14 +326,27 @@
|
||||
|
||||
<span class="k">def</span> <span class="nf">shuffle_episodes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Shuffle all the episodes in the replay buffer</span>
|
||||
<span class="sd"> Shuffle all the complete episodes in the replay buffer, while deleting the last non-complete episode</span>
|
||||
<span class="sd"> :return:</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">lock_writing</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assert_not_frozen</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># unlike the standard usage of the EpisodicExperienceReplay, where we always leave an empty episode after</span>
|
||||
<span class="c1"># the last full one, so that new transitions will have where to be added, in this case we delibrately remove</span>
|
||||
<span class="c1"># that empty last episode, as we are about to shuffle the memory, and we don't want it to be shuffled in</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">remove_last_episode</span><span class="p">(</span><span class="n">lock</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">random</span><span class="o">.</span><span class="n">shuffle</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transitions</span> <span class="o">=</span> <span class="p">[</span><span class="n">t</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">e</span><span class="o">.</span><span class="n">transitions</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># create a new Episode for the next transitions to be placed into</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Episode</span><span class="p">(</span><span class="n">n_step</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_step</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_length</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">release_writing</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_shuffled_training_data_generator</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Transition</span><span class="p">]:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Get an generator for iterating through the shuffled replay buffer, for processing the data in epochs.</span>
|
||||
@@ -384,10 +399,10 @@
|
||||
<span class="n">granularity</span><span class="p">,</span> <span class="n">size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_size</span>
|
||||
<span class="k">if</span> <span class="n">granularity</span> <span class="o">==</span> <span class="n">MemoryGranularity</span><span class="o">.</span><span class="n">Transitions</span><span class="p">:</span>
|
||||
<span class="k">while</span> <span class="n">size</span> <span class="o">!=</span> <span class="mi">0</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_transitions</span><span class="p">()</span> <span class="o">></span> <span class="n">size</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_remove_episode</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">remove_first_episode</span><span class="p">(</span><span class="n">lock</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">granularity</span> <span class="o">==</span> <span class="n">MemoryGranularity</span><span class="o">.</span><span class="n">Episodes</span><span class="p">:</span>
|
||||
<span class="k">while</span> <span class="bp">self</span><span class="o">.</span><span class="n">length</span><span class="p">()</span> <span class="o">></span> <span class="n">size</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_remove_episode</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">remove_first_episode</span><span class="p">(</span><span class="n">lock</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_update_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">episode</span><span class="p">:</span> <span class="n">Episode</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">episode</span><span class="o">.</span><span class="n">update_transitions_rewards_and_bootstrap_data</span><span class="p">()</span>
|
||||
@@ -504,31 +519,53 @@
|
||||
|
||||
<span class="k">def</span> <span class="nf">_remove_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">episode_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Remove the episode in the given index (even if it is not complete yet)</span>
|
||||
<span class="sd"> :param episode_index: the index of the episode to remove</span>
|
||||
<span class="sd"> Remove either the first or the last index</span>
|
||||
<span class="sd"> :param episode_index: the index of the episode to remove (either 0 or -1)</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assert_not_frozen</span><span class="p">()</span>
|
||||
<span class="k">assert</span> <span class="n">episode_index</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">or</span> <span class="n">episode_index</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"_remove_episode only supports removing the first or the last "</span> \
|
||||
<span class="s2">"episode"</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)</span> <span class="o">></span> <span class="n">episode_index</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">episode_length</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">[</span><span class="n">episode_index</span><span class="p">]</span><span class="o">.</span><span class="n">length</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_length</span> <span class="o">-=</span> <span class="mi">1</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_num_transitions</span> <span class="o">-=</span> <span class="n">episode_length</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_num_transitions_in_complete_episodes</span> <span class="o">-=</span> <span class="n">episode_length</span>
|
||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">transitions</span><span class="p">[:</span><span class="n">episode_length</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">episode_index</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">transitions</span><span class="p">[:</span><span class="n">episode_length</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span> <span class="c1"># episode_index = -1</span>
|
||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">transitions</span><span class="p">[</span><span class="o">-</span><span class="n">episode_length</span><span class="p">:]</span>
|
||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer</span><span class="p">[</span><span class="n">episode_index</span><span class="p">]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">remove_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">episode_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">remove_first_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lock</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Remove the episode in the given index (even if it is not complete yet)</span>
|
||||
<span class="sd"> :param episode_index: the index of the episode to remove</span>
|
||||
<span class="sd"> Remove the first episode (even if it is not complete yet)</span>
|
||||
<span class="sd"> :param lock: if true, will lock the readers writers lock. this can cause a deadlock if an inheriting class</span>
|
||||
<span class="sd"> locks and then calls store with lock = True</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">lock_writing_and_reading</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">lock</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">lock_writing_and_reading</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_remove_episode</span><span class="p">(</span><span class="n">episode_index</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_remove_episode</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">lock</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">release_writing_and_reading</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">release_writing_and_reading</span><span class="p">()</span>
|
||||
<span class="k">def</span> <span class="nf">remove_last_episode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lock</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Remove the last episode (even if it is not complete yet)</span>
|
||||
<span class="sd"> :param lock: if true, will lock the readers writers lock. this can cause a deadlock if an inheriting class</span>
|
||||
<span class="sd"> locks and then calls store with lock = True</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">lock</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">lock_writing_and_reading</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_remove_episode</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">lock</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reader_writer_lock</span><span class="o">.</span><span class="n">release_writing_and_reading</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># for API compatibility</span>
|
||||
<span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">episode_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">lock</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="n">Episode</span><span class="p">]:</span>
|
||||
@@ -555,15 +592,6 @@
|
||||
|
||||
<span class="k">return</span> <span class="n">episode</span>
|
||||
|
||||
<span class="c1"># for API compatibility</span>
|
||||
<span class="k">def</span> <span class="nf">remove</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">episode_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Remove the episode in the given index (even if it is not complete yet)</span>
|
||||
<span class="sd"> :param episode_index: the index of the episode to remove</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">remove_episode</span><span class="p">(</span><span class="n">episode_index</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">clean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Clean the memory by removing all the episodes</span>
|
||||
@@ -629,7 +657,7 @@
|
||||
|
||||
<span class="n">transitions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="n">Transition</span><span class="p">(</span><span class="n">state</span><span class="o">=</span><span class="p">{</span><span class="s1">'observation'</span><span class="p">:</span> <span class="n">state</span><span class="p">},</span>
|
||||
<span class="n">action</span><span class="o">=</span><span class="n">current_transition</span><span class="p">[</span><span class="s1">'action'</span><span class="p">],</span> <span class="n">reward</span><span class="o">=</span><span class="n">current_transition</span><span class="p">[</span><span class="s1">'reward'</span><span class="p">],</span>
|
||||
<span class="n">action</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="n">current_transition</span><span class="p">[</span><span class="s1">'action'</span><span class="p">]),</span> <span class="n">reward</span><span class="o">=</span><span class="n">current_transition</span><span class="p">[</span><span class="s1">'reward'</span><span class="p">],</span>
|
||||
<span class="n">next_state</span><span class="o">=</span><span class="p">{</span><span class="s1">'observation'</span><span class="p">:</span> <span class="n">next_state</span><span class="p">},</span> <span class="n">game_over</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||||
<span class="n">info</span><span class="o">=</span><span class="p">{</span><span class="s1">'all_action_probabilities'</span><span class="p">:</span>
|
||||
<span class="n">ast</span><span class="o">.</span><span class="n">literal_eval</span><span class="p">(</span><span class="n">current_transition</span><span class="p">[</span><span class="s1">'all_action_probabilities'</span><span class="p">])}),</span>
|
||||
@@ -698,7 +726,40 @@
|
||||
<span class="n">episode_num</span><span class="p">,</span> <span class="n">episode</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_episode_for_transition</span><span class="p">(</span><span class="n">transition</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">last_training_set_episode_id</span> <span class="o">=</span> <span class="n">episode_num</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">last_training_set_transition_id</span> <span class="o">=</span> \
|
||||
<span class="nb">len</span><span class="p">([</span><span class="n">t</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_all_complete_episodes_from_to</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_training_set_episode_id</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">e</span><span class="p">])</span></div>
|
||||
<span class="nb">len</span><span class="p">([</span><span class="n">t</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_all_complete_episodes_from_to</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_training_set_episode_id</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">e</span><span class="p">])</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Save the replay buffer contents to a pickle file</span>
|
||||
<span class="sd"> :param file_path: the path to the file that will be used to store the pickled transitions</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file_path</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_all_complete_episodes</span><span class="p">(),</span> <span class="n">file</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">load_pickled</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Restore the replay buffer contents from a pickle file.</span>
|
||||
<span class="sd"> The pickle file is assumed to include a list of transitions.</span>
|
||||
<span class="sd"> :param file_path: The path to a pickle file to restore</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assert_not_frozen</span><span class="p">()</span>
|
||||
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file_path</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">episodes</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">file</span><span class="p">)</span>
|
||||
<span class="n">num_transitions</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">transitions</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">episodes</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">num_transitions</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">max_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
|
||||
<span class="n">screen</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Warning! The number of transition to load into the replay buffer (</span><span class="si">{}</span><span class="s2">) is "</span>
|
||||
<span class="s2">"bigger than the max size of the replay buffer (</span><span class="si">{}</span><span class="s2">). The excessive transitions will "</span>
|
||||
<span class="s2">"not be stored."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">num_transitions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
|
||||
|
||||
<span class="n">progress_bar</span> <span class="o">=</span> <span class="n">ProgressBar</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">episodes</span><span class="p">))</span>
|
||||
<span class="k">for</span> <span class="n">episode_idx</span><span class="p">,</span> <span class="n">episode</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">episodes</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">store_episode</span><span class="p">(</span><span class="n">episode</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># print progress</span>
|
||||
<span class="n">progress_bar</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">episode_idx</span><span class="p">)</span>
|
||||
|
||||
<span class="n">progress_bar</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -381,15 +381,6 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_transition</span><span class="p">(</span><span class="n">transition_index</span><span class="p">,</span> <span class="n">lock</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># for API compatibility</span>
|
||||
<span class="k">def</span> <span class="nf">remove</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transition_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">lock</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Remove the transition in the given index</span>
|
||||
<span class="sd"> :param transition_index: the index of the transition to remove</span>
|
||||
<span class="sd"> :return: None</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">remove_transition</span><span class="p">(</span><span class="n">transition_index</span><span class="p">,</span> <span class="n">lock</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">clean</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lock</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Clean the memory by removing all the episodes</span>
|
||||
|
||||
Reference in New Issue
Block a user