Multiple improvements and bug fixes (#66)

* Multiple improvements and bug fixes: * Using lazy stacking to save on memory when using a replay buffer * Remove step counting for evaluation episodes * Reset game between heatup and training * Major bug fixes in NEC (is reproducing the paper results for pong now) * Image input rescaling to 0-1 is now optional * Change the terminal title to be the experiment name * Observation cropping for atari is now optional * Added random number of noop actions for gym to match the dqn paper * Fixed a bug where the evaluation episodes won't start with the max possible ale lives * Added a script for plotting the results of an experiment over all the atari games
2026-02-09 18:15:55 +01:00 · 2018-02-26 12:29:07 +02:00
parent 4fe9cba445
commit a7206ed702
20 changed files with 465 additions and 158 deletions
--- a/utils.py
+++ b/utils.py
@@ -21,6 +21,7 @@ import numpy as np
 import threading
 from subprocess import call, Popen
 import signal
+import copy

 killed_processes = []

@@ -333,6 +334,23 @@ def switch_axes_order(observation, from_type='channels_first', to_type='channels
        return np.transpose(observation, (1, 0))


+class LazyStack(object):
+    """
+    A lazy version of np.stack which avoids copying the memory until it is
+    needed.
+    """
+
+    def __init__(self, history, axis=None):
+        self.history = copy.copy(history)
+        self.axis = axis
+
+    def __array__(self, dtype=None):
+        array = np.stack(self.history, axis=self.axis)
+        if dtype is not None:
+            array = array.astype(dtype)
+        return array
+
+
 def stack_observation(curr_stack, observation, stack_size):
    """
    Adds a new observation to an existing stack of observations from previous time-steps.