From cd6376f8210f72fe503e200ade83796dd8ce803c Mon Sep 17 00:00:00 2001 From: Roman Dobosz Date: Thu, 10 May 2018 09:19:32 +0200 Subject: [PATCH] removing doom env --- coach/configurations.py | 10 +- coach/environments/__init__.py | 2 - .../environments/doom_environment_wrapper.py | 158 ------------ coach/presets.py | 230 +----------------- 4 files changed, 6 insertions(+), 394 deletions(-) delete mode 100644 coach/environments/doom_environment_wrapper.py diff --git a/coach/configurations.py b/coach/configurations.py index a03c319..18127f6 100644 --- a/coach/configurations.py +++ b/coach/configurations.py @@ -148,7 +148,7 @@ class AgentParameters(Parameters): class EnvironmentParameters(Parameters): - type = 'Doom' + type = 'Gym' level = 'basic' observation_stack_size = 4 frame_skip = 4 @@ -295,14 +295,6 @@ class Atari(EnvironmentParameters): crop_observation = False # in the original paper the observation is cropped but not in the Nature paper -class Doom(EnvironmentParameters): - type = 'Doom' - frame_skip = 4 - observation_stack_size = 3 - desired_observation_height = 60 - desired_observation_width = 76 - - class Carla(EnvironmentParameters): type = 'Carla' frame_skip = 1 diff --git a/coach/environments/__init__.py b/coach/environments/__init__.py index 00ff609..a02e5a6 100644 --- a/coach/environments/__init__.py +++ b/coach/environments/__init__.py @@ -14,13 +14,11 @@ # limitations under the License. # from coach.environments.gym_environment_wrapper import GymEnvironmentWrapper -from coach.environments.doom_environment_wrapper import DoomEnvironmentWrapper from coach.environments.carla_environment_wrapper import CarlaEnvironmentWrapper from coach import utils class EnvTypes(utils.Enum): - Doom = "DoomEnvironmentWrapper" Gym = "GymEnvironmentWrapper" Carla = "CarlaEnvironmentWrapper" diff --git a/coach/environments/doom_environment_wrapper.py b/coach/environments/doom_environment_wrapper.py deleted file mode 100644 index 8f9117f..0000000 --- a/coach/environments/doom_environment_wrapper.py +++ /dev/null @@ -1,158 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os - -import numpy as np - -from coach import logger -try: - import vizdoom -except ImportError: - logger.failed_imports.append("ViZDoom") - -from coach.environments import environment_wrapper as ew -from coach import utils - - -# enum of the available levels and their path -class DoomLevel(utils.Enum): - BASIC = "basic.cfg" - DEFEND = "defend_the_center.cfg" - DEATHMATCH = "deathmatch.cfg" - MY_WAY_HOME = "my_way_home.cfg" - TAKE_COVER = "take_cover.cfg" - HEALTH_GATHERING = "health_gathering.cfg" - HEALTH_GATHERING_SUPREME = "health_gathering_supreme.cfg" - DEFEND_THE_LINE = "defend_the_line.cfg" - DEADLY_CORRIDOR = "deadly_corridor.cfg" - - -key_map = { - 'NO-OP': 96, # ` - 'ATTACK': 13, # enter - 'CROUCH': 306, # ctrl - 'DROP_SELECTED_ITEM': ord("t"), - 'DROP_SELECTED_WEAPON': ord("t"), - 'JUMP': 32, # spacebar - 'LAND': ord("l"), - 'LOOK_DOWN': 274, # down arrow - 'LOOK_UP': 273, # up arrow - 'MOVE_BACKWARD': ord("s"), - 'MOVE_DOWN': ord("s"), - 'MOVE_FORWARD': ord("w"), - 'MOVE_LEFT': 276, - 'MOVE_RIGHT': 275, - 'MOVE_UP': ord("w"), - 'RELOAD': ord("r"), - 'SELECT_NEXT_WEAPON': ord("q"), - 'SELECT_PREV_WEAPON': ord("e"), - 'SELECT_WEAPON0': ord("0"), - 'SELECT_WEAPON1': ord("1"), - 'SELECT_WEAPON2': ord("2"), - 'SELECT_WEAPON3': ord("3"), - 'SELECT_WEAPON4': ord("4"), - 'SELECT_WEAPON5': ord("5"), - 'SELECT_WEAPON6': ord("6"), - 'SELECT_WEAPON7': ord("7"), - 'SELECT_WEAPON8': ord("8"), - 'SELECT_WEAPON9': ord("9"), - 'SPEED': 304, # shift - 'STRAFE': 9, # tab - 'TURN180': ord("u"), - 'TURN_LEFT': ord("a"), # left arrow - 'TURN_RIGHT': ord("d"), # right arrow - 'USE': ord("f"), -} - - -class DoomEnvironmentWrapper(ew.EnvironmentWrapper): - def __init__(self, tuning_parameters): - ew.EnvironmentWrapper.__init__(self, tuning_parameters) - - # load the emulator with the required level - self.level = DoomLevel().get(self.tp.env.level) - self.scenarios_dir = os.path.join(os.environ.get('VIZDOOM_ROOT'), - 'scenarios') - self.game = vizdoom.DoomGame() - self.game.load_config(os.path.join(self.scenarios_dir, self.level)) - self.game.set_window_visible(False) - self.game.add_game_args("+vid_forcesurface 1") - - if self.is_rendered: - self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240) - self.renderer.create_screen(320, 240) - else: - # lower resolution since we actually take only 76x60 and we don't need to render - self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_160X120) - - self.game.set_render_hud(False) - self.game.set_render_crosshair(False) - self.game.set_render_decals(False) - self.game.set_render_particles(False) - self.game.init() - - # action space - self.action_space_abs_range = 0 - self.actions = {} - self.action_space_size = self.game.get_available_buttons_size() + 1 - self.action_vector_size = self.action_space_size - 1 - self.actions[0] = [0] * self.action_vector_size - for action_idx in range(self.action_vector_size): - self.actions[action_idx + 1] = [0] * self.action_vector_size - self.actions[action_idx + 1][action_idx] = 1 - self.actions_description = ['NO-OP'] - self.actions_description += [str(action).split(".")[1] for action in self.game.get_available_buttons()] - for idx, action in enumerate(self.actions_description): - if action in key_map.keys(): - self.key_to_action[(key_map[action],)] = idx - - # measurement - self.measurements_size = self.game.get_state().game_variables.shape - - self.width = self.game.get_screen_width() - self.height = self.game.get_screen_height() - if self.tp.seed is not None: - self.game.set_seed(self.tp.seed) - self.reset() - - def _update_state(self): - # extract all data from the current state - state = self.game.get_state() - if state is not None and state.screen_buffer is not None: - self.state = { - 'observation': state.screen_buffer, - 'measurements': state.game_variables, - } - self.reward = self.game.get_last_reward() - self.done = self.game.is_episode_finished() - - def _take_action(self, action_idx): - self.game.make_action(self._idx_to_action(action_idx), self.frame_skip) - - def _preprocess_observation(self, observation): - if observation is None: - return None - - # for the last step we get no new observation, so we shouldn't preprocess it - if self.done: - return observation - - # move the channel to the last axis - observation = np.transpose(observation, (1, 2, 0)) - return observation - - def _restart_environment_episode(self, force_environment_reset=False): - self.game.new_episode() diff --git a/coach/presets.py b/coach/presets.py index 5d3cce5..a954359 100644 --- a/coach/presets.py +++ b/coach/presets.py @@ -17,11 +17,11 @@ import ast import json import sys -from coach import agents +from coach import agents # noqa from coach import configurations as conf -from coach import environments as env -from coach import exploration_policies as ep -from coach import presets +from coach import environments as env # noqa +from coach import exploration_policies as ep # noqa +from coach import presets # noqa def json_to_preset(json_path): @@ -69,79 +69,6 @@ def json_to_preset(json_path): return tuning_parameters -class Doom_Basic_DQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.num_heatup_steps = 1000 - - -class Doom_Basic_QRDQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.QuantileRegressionDQN, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.learning_rate = 0.00025 - self.agent.num_episodes_in_experience_replay = 200 - self.num_heatup_steps = 1000 - - -class Doom_Basic_OneStepQ(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.learning_rate = 0.00025 - self.num_heatup_steps = 0 - self.agent.num_steps_between_copying_online_weights_to_target = 100 - self.agent.optimizer_type = 'Adam' - self.clip_gradients = 1000 - self.agent.targets_horizon = '1-Step' - - -class Doom_Basic_NStepQ(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.learning_rate = 0.000025 - self.num_heatup_steps = 0 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.agent.optimizer_type = 'Adam' - self.clip_gradients = 1000 - - -class Doom_Basic_A2C(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration) - self.env.level = 'basic' - self.agent.policy_gradient_rescaler = 'A_VALUE' - self.learning_rate = 0.00025 - self.num_heatup_steps = 100 - self.env.reward_scaling = 100. - - -class Doom_Basic_Dueling_DDQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DDQN, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.agent.output_types = [conf.OutputTypes.DuelingQ] - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.num_heatup_steps = 1000 - -class Doom_Basic_Dueling_DQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DuelingDQN, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.num_heatup_steps = 1000 - - class CartPole_Dueling_DDQN(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.DDQN, conf.GymVectorObservation, conf.ExplorationParameters) @@ -158,17 +85,6 @@ class CartPole_Dueling_DDQN(conf.Preset): self.test_max_step_threshold = 100 self.test_min_return_threshold = 150 - -class Doom_Health_MMC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.MMC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'HEALTH_GATHERING' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.num_heatup_steps = 1000 - self.exploration.epsilon_decay_steps = 10000 - class CartPole_MMC(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.MMC, conf.GymVectorObservation, conf.ExplorationParameters) @@ -203,7 +119,7 @@ class CartPole_PAL(conf.Preset): class CartPole_DFP(conf.Preset): def __init__(self): - Preset.__init__(self, conf.DFP, conf.GymVectorObservation, conf.ExplorationParameters) + conf.Preset.__init__(self, conf.DFP, conf.GymVectorObservation, conf.ExplorationParameters) self.env.level = 'CartPole-v0' self.agent.num_episodes_in_experience_replay = 200 self.learning_rate = 0.0001 @@ -213,40 +129,6 @@ class CartPole_DFP(conf.Preset): self.agent.goal_vector = [1.0] -class Doom_Basic_DFP(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters) - self.env.level = 'BASIC' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.0001 - self.num_heatup_steps = 1000 - self.exploration.epsilon_decay_steps = 10000 - self.agent.use_accumulated_reward_as_measurement = True - self.agent.goal_vector = [0.0, 1.0] - # self.agent.num_consecutive_playing_steps = 10 - - -class Doom_Health_DFP(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters) - self.env.level = 'HEALTH_GATHERING' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.num_heatup_steps = 1000 - self.exploration.epsilon_decay_steps = 10000 - self.agent.use_accumulated_reward_as_measurement = True - - -class Doom_Deadly_Corridor_Bootstrapped_DQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.BootstrappedDQN, conf.Doom, conf.BootstrappedDQNExploration) - self.env.level = 'deadly_corridor' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - self.num_heatup_steps = 1000 - - class CartPole_Bootstrapped_DQN(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.BootstrappedDQN, conf.GymVectorObservation, conf.BootstrappedDQNExploration) @@ -538,16 +420,6 @@ class Atari_DQN_TestBench(conf.Preset): self.num_training_iterations = 500 -class Doom_Basic_PG(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.PolicyGradient, conf.Doom, conf.CategoricalExploration) - self.env.level = 'basic' - self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP' - self.learning_rate = 0.00001 - self.num_heatup_steps = 0 - self.agent.beta_entropy = 0.01 - - class InvertedPendulum_PG(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.AdditiveNoiseExploration) @@ -925,22 +797,6 @@ class CartPole_NEC(conf.Preset): self.test_min_return_threshold = 150 -class Doom_Basic_NEC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.learning_rate = 0.00001 - self.agent.num_transitions_in_experience_replay = 100000 - # self.exploration.initial_epsilon = 0.1 # TODO: try exploration - # self.exploration.final_epsilon = 0.1 - # self.exploration.epsilon_decay_steps = 1000000 - self.num_heatup_steps = 200 - self.evaluation_episodes = 1 - self.evaluate_every_x_episodes = 5 - self.seed = 123 - - - class Montezuma_NEC(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters) @@ -971,28 +827,6 @@ class Breakout_NEC(conf.Preset): self.seed = 123 -class Doom_Health_NEC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'HEALTH_GATHERING' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.num_heatup_steps = 1000 - self.exploration.epsilon_decay_steps = 10000 - self.agent.num_playing_steps_between_two_training_steps = 1 - - -class Doom_Health_DQN(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters) - self.env.level = 'HEALTH_GATHERING' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.num_heatup_steps = 1000 - self.exploration.epsilon_decay_steps = 10000 - self.agent.num_steps_between_copying_online_weights_to_target = 1000 - - class Pong_NEC_LSTM(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters) @@ -1285,23 +1119,6 @@ class BipedalWalker_A3C(conf.Preset): self.agent.middleware_type = conf.MiddlewareTypes.FC -class Doom_Basic_A3C(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration) - self.env.level = 'basic' - self.agent.policy_gradient_rescaler = 'GAE' - self.learning_rate = 0.0001 - self.num_heatup_steps = 0 - self.env.reward_scaling = 100. - self.agent.discount = 0.99 - self.agent.apply_gradients_every_x_episodes = 1 - self.agent.num_steps_between_gradient_updates = 30 - self.agent.gae_lambda = 1 - self.agent.beta_entropy = 0.01 - self.clip_gradients = 40 - self.agent.middleware_type = conf.MiddlewareTypes.FC - - class Pong_A3C(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.ActorCritic, conf.Atari, conf.CategoricalExploration) @@ -1372,43 +1189,6 @@ class Carla_BC(conf.Preset): self.evaluate_every_x_training_iterations = 5000 -class Doom_Basic_BC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'basic' - self.agent.load_memory_from_file_path = 'datasets/doom_basic.p' - self.learning_rate = 0.0005 - self.num_heatup_steps = 0 - self.evaluation_episodes = 5 - self.batch_size = 120 - self.evaluate_every_x_training_iterations = 100 - self.num_training_iterations = 2000 - - -class Doom_Defend_BC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'defend' - self.agent.load_memory_from_file_path = 'datasets/doom_defend.p' - self.learning_rate = 0.0005 - self.num_heatup_steps = 0 - self.evaluation_episodes = 5 - self.batch_size = 120 - self.evaluate_every_x_training_iterations = 100 - - -class Doom_Deathmatch_BC(conf.Preset): - def __init__(self): - conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters) - self.env.level = 'deathmatch' - self.agent.load_memory_from_file_path = 'datasets/doom_deathmatch.p' - self.learning_rate = 0.0005 - self.num_heatup_steps = 0 - self.evaluation_episodes = 5 - self.batch_size = 120 - self.evaluate_every_x_training_iterations = 100 - - class MontezumaRevenge_BC(conf.Preset): def __init__(self): conf.Preset.__init__(self, conf.BC, conf.Atari, conf.ExplorationParameters)