mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Multiple improvements and bug fixes (#66)
* Multiple improvements and bug fixes:
* Using lazy stacking to save on memory when using a replay buffer
* Remove step counting for evaluation episodes
* Reset game between heatup and training
* Major bug fixes in NEC (is reproducing the paper results for pong now)
* Image input rescaling to 0-1 is now optional
* Change the terminal title to be the experiment name
* Observation cropping for atari is now optional
* Added random number of noop actions for gym to match the dqn paper
* Fixed a bug where the evaluation episodes won't start with the max possible ale lives
* Added a script for plotting the results of an experiment over all the atari games
This commit is contained in:
@@ -19,6 +19,7 @@ from logger import *
|
||||
import gym
|
||||
import numpy as np
|
||||
import time
|
||||
import random
|
||||
try:
|
||||
import roboschool
|
||||
from OpenGL import GL
|
||||
@@ -59,7 +60,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
|
||||
# self.env_spec = gym.spec(self.env_id)
|
||||
self.env.frameskip = self.frame_skip
|
||||
self.discrete_controls = type(self.env.action_space) != gym.spaces.box.Box
|
||||
|
||||
self.random_initialization_steps = 0
|
||||
self.state = self.reset(True)['state']
|
||||
|
||||
# render
|
||||
@@ -113,6 +114,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
|
||||
else:
|
||||
self.timestep_limit = None
|
||||
self.measurements_size = len(self.step(0)['info'].keys())
|
||||
self.random_initialization_steps = self.tp.env.random_initialization_steps
|
||||
|
||||
def _wrap_state(self, state):
|
||||
if isinstance(self.env.observation_space, gym.spaces.Dict):
|
||||
@@ -155,8 +157,9 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
|
||||
|
||||
def _preprocess_state(self, state):
|
||||
# TODO: move this into wrapper
|
||||
if any(env in self.env_id for env in ["Breakout", "Pong"]):
|
||||
# crop image
|
||||
# crop image for atari games
|
||||
# the image from the environment is 210x160
|
||||
if self.tp.env.crop_observation and hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
|
||||
state['observation'] = state['observation'][34:195, :, :]
|
||||
return state
|
||||
|
||||
@@ -170,7 +173,16 @@ class GymEnvironmentWrapper(EnvironmentWrapper):
|
||||
self.env.seed(self.seed)
|
||||
|
||||
self.state = self._wrap_state(self.env.reset())
|
||||
while self.state is None:
|
||||
|
||||
# initialize the number of lives
|
||||
if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'):
|
||||
self.current_ale_lives = self.env.env.ale.lives()
|
||||
|
||||
# simulate a random initial environment state by stepping for a random number of times between 0 and 30
|
||||
step_count = 0
|
||||
random_initialization_steps = random.randint(0, self.random_initialization_steps)
|
||||
while self.state is None or step_count < random_initialization_steps:
|
||||
step_count += 1
|
||||
self.step(0)
|
||||
|
||||
return self.state
|
||||
|
||||
Reference in New Issue
Block a user