Multiple improvements and bug fixes (#66)

* Multiple improvements and bug fixes: * Using lazy stacking to save on memory when using a replay buffer * Remove step counting for evaluation episodes * Reset game between heatup and training * Major bug fixes in NEC (is reproducing the paper results for pong now) * Image input rescaling to 0-1 is now optional * Change the terminal title to be the experiment name * Observation cropping for atari is now optional * Added random number of noop actions for gym to match the dqn paper * Fixed a bug where the evaluation episodes won't start with the max possible ale lives * Added a script for plotting the results of an experiment over all the atari games
2026-02-17 14:45:50 +01:00 · 2018-02-26 12:29:07 +02:00
parent 4fe9cba445
commit a7206ed702
20 changed files with 465 additions and 158 deletions
--- a/environments/gym_environment_wrapper.py
+++ b/environments/gym_environment_wrapper.py
@@ -19,6 +19,7 @@ from logger import *
 import gym
 import numpy as np
 import time
+import random
 try:
    import roboschool
    from OpenGL import GL
@@ -59,7 +60,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
        # self.env_spec = gym.spec(self.env_id)
        self.env.frameskip = self.frame_skip
        self.discrete_controls = type(self.env.action_space) != gym.spaces.box.Box
-
+        self.random_initialization_steps = 0
        self.state = self.reset(True)['state']

        # render
@@ -113,6 +114,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
        else:
            self.timestep_limit = None
        self.measurements_size = len(self.step(0)['info'].keys())
+        self.random_initialization_steps = self.tp.env.random_initialization_steps

    def _wrap_state(self, state):
        if isinstance(self.env.observation_space, gym.spaces.Dict):
@@ -155,8 +157,9 @@ class GymEnvironmentWrapper(EnvironmentWrapper):

    def _preprocess_state(self, state):
        # TODO: move this into wrapper
-        if any(env in self.env_id for env in ["Breakout", "Pong"]):
-            # crop image
+        # crop image for atari games
+        # the image from the environment is 210x160
+        if self.tp.env.crop_observation and hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
            state['observation'] = state['observation'][34:195, :, :]
        return state

@@ -170,7 +173,16 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
            self.env.seed(self.seed)

        self.state = self._wrap_state(self.env.reset())
-        while self.state is None:
+
+        # initialize the number of lives
+        if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
+            self.current_ale_lives = self.env.env.ale.lives()
+
+        # simulate a random initial environment state by stepping for a random number of times between 0 and 30
+        step_count = 0
+        random_initialization_steps = random.randint(0, self.random_initialization_steps)
+        while self.state is None or step_count < random_initialization_steps:
+            step_count += 1
            self.step(0)

        return self.state