mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
removing doom env
This commit is contained in:
@@ -148,7 +148,7 @@ class AgentParameters(Parameters):
|
|||||||
|
|
||||||
|
|
||||||
class EnvironmentParameters(Parameters):
|
class EnvironmentParameters(Parameters):
|
||||||
type = 'Doom'
|
type = 'Gym'
|
||||||
level = 'basic'
|
level = 'basic'
|
||||||
observation_stack_size = 4
|
observation_stack_size = 4
|
||||||
frame_skip = 4
|
frame_skip = 4
|
||||||
@@ -295,14 +295,6 @@ class Atari(EnvironmentParameters):
|
|||||||
crop_observation = False # in the original paper the observation is cropped but not in the Nature paper
|
crop_observation = False # in the original paper the observation is cropped but not in the Nature paper
|
||||||
|
|
||||||
|
|
||||||
class Doom(EnvironmentParameters):
|
|
||||||
type = 'Doom'
|
|
||||||
frame_skip = 4
|
|
||||||
observation_stack_size = 3
|
|
||||||
desired_observation_height = 60
|
|
||||||
desired_observation_width = 76
|
|
||||||
|
|
||||||
|
|
||||||
class Carla(EnvironmentParameters):
|
class Carla(EnvironmentParameters):
|
||||||
type = 'Carla'
|
type = 'Carla'
|
||||||
frame_skip = 1
|
frame_skip = 1
|
||||||
|
|||||||
@@ -14,13 +14,11 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
from coach.environments.gym_environment_wrapper import GymEnvironmentWrapper
|
from coach.environments.gym_environment_wrapper import GymEnvironmentWrapper
|
||||||
from coach.environments.doom_environment_wrapper import DoomEnvironmentWrapper
|
|
||||||
from coach.environments.carla_environment_wrapper import CarlaEnvironmentWrapper
|
from coach.environments.carla_environment_wrapper import CarlaEnvironmentWrapper
|
||||||
from coach import utils
|
from coach import utils
|
||||||
|
|
||||||
|
|
||||||
class EnvTypes(utils.Enum):
|
class EnvTypes(utils.Enum):
|
||||||
Doom = "DoomEnvironmentWrapper"
|
|
||||||
Gym = "GymEnvironmentWrapper"
|
Gym = "GymEnvironmentWrapper"
|
||||||
Carla = "CarlaEnvironmentWrapper"
|
Carla = "CarlaEnvironmentWrapper"
|
||||||
|
|
||||||
|
|||||||
@@ -1,158 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2017 Intel Corporation
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
import os
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from coach import logger
|
|
||||||
try:
|
|
||||||
import vizdoom
|
|
||||||
except ImportError:
|
|
||||||
logger.failed_imports.append("ViZDoom")
|
|
||||||
|
|
||||||
from coach.environments import environment_wrapper as ew
|
|
||||||
from coach import utils
|
|
||||||
|
|
||||||
|
|
||||||
# enum of the available levels and their path
|
|
||||||
class DoomLevel(utils.Enum):
|
|
||||||
BASIC = "basic.cfg"
|
|
||||||
DEFEND = "defend_the_center.cfg"
|
|
||||||
DEATHMATCH = "deathmatch.cfg"
|
|
||||||
MY_WAY_HOME = "my_way_home.cfg"
|
|
||||||
TAKE_COVER = "take_cover.cfg"
|
|
||||||
HEALTH_GATHERING = "health_gathering.cfg"
|
|
||||||
HEALTH_GATHERING_SUPREME = "health_gathering_supreme.cfg"
|
|
||||||
DEFEND_THE_LINE = "defend_the_line.cfg"
|
|
||||||
DEADLY_CORRIDOR = "deadly_corridor.cfg"
|
|
||||||
|
|
||||||
|
|
||||||
key_map = {
|
|
||||||
'NO-OP': 96, # `
|
|
||||||
'ATTACK': 13, # enter
|
|
||||||
'CROUCH': 306, # ctrl
|
|
||||||
'DROP_SELECTED_ITEM': ord("t"),
|
|
||||||
'DROP_SELECTED_WEAPON': ord("t"),
|
|
||||||
'JUMP': 32, # spacebar
|
|
||||||
'LAND': ord("l"),
|
|
||||||
'LOOK_DOWN': 274, # down arrow
|
|
||||||
'LOOK_UP': 273, # up arrow
|
|
||||||
'MOVE_BACKWARD': ord("s"),
|
|
||||||
'MOVE_DOWN': ord("s"),
|
|
||||||
'MOVE_FORWARD': ord("w"),
|
|
||||||
'MOVE_LEFT': 276,
|
|
||||||
'MOVE_RIGHT': 275,
|
|
||||||
'MOVE_UP': ord("w"),
|
|
||||||
'RELOAD': ord("r"),
|
|
||||||
'SELECT_NEXT_WEAPON': ord("q"),
|
|
||||||
'SELECT_PREV_WEAPON': ord("e"),
|
|
||||||
'SELECT_WEAPON0': ord("0"),
|
|
||||||
'SELECT_WEAPON1': ord("1"),
|
|
||||||
'SELECT_WEAPON2': ord("2"),
|
|
||||||
'SELECT_WEAPON3': ord("3"),
|
|
||||||
'SELECT_WEAPON4': ord("4"),
|
|
||||||
'SELECT_WEAPON5': ord("5"),
|
|
||||||
'SELECT_WEAPON6': ord("6"),
|
|
||||||
'SELECT_WEAPON7': ord("7"),
|
|
||||||
'SELECT_WEAPON8': ord("8"),
|
|
||||||
'SELECT_WEAPON9': ord("9"),
|
|
||||||
'SPEED': 304, # shift
|
|
||||||
'STRAFE': 9, # tab
|
|
||||||
'TURN180': ord("u"),
|
|
||||||
'TURN_LEFT': ord("a"), # left arrow
|
|
||||||
'TURN_RIGHT': ord("d"), # right arrow
|
|
||||||
'USE': ord("f"),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DoomEnvironmentWrapper(ew.EnvironmentWrapper):
|
|
||||||
def __init__(self, tuning_parameters):
|
|
||||||
ew.EnvironmentWrapper.__init__(self, tuning_parameters)
|
|
||||||
|
|
||||||
# load the emulator with the required level
|
|
||||||
self.level = DoomLevel().get(self.tp.env.level)
|
|
||||||
self.scenarios_dir = os.path.join(os.environ.get('VIZDOOM_ROOT'),
|
|
||||||
'scenarios')
|
|
||||||
self.game = vizdoom.DoomGame()
|
|
||||||
self.game.load_config(os.path.join(self.scenarios_dir, self.level))
|
|
||||||
self.game.set_window_visible(False)
|
|
||||||
self.game.add_game_args("+vid_forcesurface 1")
|
|
||||||
|
|
||||||
if self.is_rendered:
|
|
||||||
self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240)
|
|
||||||
self.renderer.create_screen(320, 240)
|
|
||||||
else:
|
|
||||||
# lower resolution since we actually take only 76x60 and we don't need to render
|
|
||||||
self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_160X120)
|
|
||||||
|
|
||||||
self.game.set_render_hud(False)
|
|
||||||
self.game.set_render_crosshair(False)
|
|
||||||
self.game.set_render_decals(False)
|
|
||||||
self.game.set_render_particles(False)
|
|
||||||
self.game.init()
|
|
||||||
|
|
||||||
# action space
|
|
||||||
self.action_space_abs_range = 0
|
|
||||||
self.actions = {}
|
|
||||||
self.action_space_size = self.game.get_available_buttons_size() + 1
|
|
||||||
self.action_vector_size = self.action_space_size - 1
|
|
||||||
self.actions[0] = [0] * self.action_vector_size
|
|
||||||
for action_idx in range(self.action_vector_size):
|
|
||||||
self.actions[action_idx + 1] = [0] * self.action_vector_size
|
|
||||||
self.actions[action_idx + 1][action_idx] = 1
|
|
||||||
self.actions_description = ['NO-OP']
|
|
||||||
self.actions_description += [str(action).split(".")[1] for action in self.game.get_available_buttons()]
|
|
||||||
for idx, action in enumerate(self.actions_description):
|
|
||||||
if action in key_map.keys():
|
|
||||||
self.key_to_action[(key_map[action],)] = idx
|
|
||||||
|
|
||||||
# measurement
|
|
||||||
self.measurements_size = self.game.get_state().game_variables.shape
|
|
||||||
|
|
||||||
self.width = self.game.get_screen_width()
|
|
||||||
self.height = self.game.get_screen_height()
|
|
||||||
if self.tp.seed is not None:
|
|
||||||
self.game.set_seed(self.tp.seed)
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def _update_state(self):
|
|
||||||
# extract all data from the current state
|
|
||||||
state = self.game.get_state()
|
|
||||||
if state is not None and state.screen_buffer is not None:
|
|
||||||
self.state = {
|
|
||||||
'observation': state.screen_buffer,
|
|
||||||
'measurements': state.game_variables,
|
|
||||||
}
|
|
||||||
self.reward = self.game.get_last_reward()
|
|
||||||
self.done = self.game.is_episode_finished()
|
|
||||||
|
|
||||||
def _take_action(self, action_idx):
|
|
||||||
self.game.make_action(self._idx_to_action(action_idx), self.frame_skip)
|
|
||||||
|
|
||||||
def _preprocess_observation(self, observation):
|
|
||||||
if observation is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# for the last step we get no new observation, so we shouldn't preprocess it
|
|
||||||
if self.done:
|
|
||||||
return observation
|
|
||||||
|
|
||||||
# move the channel to the last axis
|
|
||||||
observation = np.transpose(observation, (1, 2, 0))
|
|
||||||
return observation
|
|
||||||
|
|
||||||
def _restart_environment_episode(self, force_environment_reset=False):
|
|
||||||
self.game.new_episode()
|
|
||||||
230
coach/presets.py
230
coach/presets.py
@@ -17,11 +17,11 @@ import ast
|
|||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from coach import agents
|
from coach import agents # noqa
|
||||||
from coach import configurations as conf
|
from coach import configurations as conf
|
||||||
from coach import environments as env
|
from coach import environments as env # noqa
|
||||||
from coach import exploration_policies as ep
|
from coach import exploration_policies as ep # noqa
|
||||||
from coach import presets
|
from coach import presets # noqa
|
||||||
|
|
||||||
|
|
||||||
def json_to_preset(json_path):
|
def json_to_preset(json_path):
|
||||||
@@ -69,79 +69,6 @@ def json_to_preset(json_path):
|
|||||||
return tuning_parameters
|
return tuning_parameters
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_DQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_QRDQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.QuantileRegressionDQN, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_OneStepQ(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 100
|
|
||||||
self.agent.optimizer_type = 'Adam'
|
|
||||||
self.clip_gradients = 1000
|
|
||||||
self.agent.targets_horizon = '1-Step'
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_NStepQ(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.NStepQ, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.learning_rate = 0.000025
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.agent.optimizer_type = 'Adam'
|
|
||||||
self.clip_gradients = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_A2C(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.policy_gradient_rescaler = 'A_VALUE'
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.num_heatup_steps = 100
|
|
||||||
self.env.reward_scaling = 100.
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_Dueling_DDQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DDQN, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.output_types = [conf.OutputTypes.DuelingQ]
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
|
|
||||||
class Doom_Basic_Dueling_DQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DuelingDQN, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class CartPole_Dueling_DDQN(conf.Preset):
|
class CartPole_Dueling_DDQN(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.DDQN, conf.GymVectorObservation, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.DDQN, conf.GymVectorObservation, conf.ExplorationParameters)
|
||||||
@@ -158,17 +85,6 @@ class CartPole_Dueling_DDQN(conf.Preset):
|
|||||||
self.test_max_step_threshold = 100
|
self.test_max_step_threshold = 100
|
||||||
self.test_min_return_threshold = 150
|
self.test_min_return_threshold = 150
|
||||||
|
|
||||||
|
|
||||||
class Doom_Health_MMC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.MMC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'HEALTH_GATHERING'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
self.exploration.epsilon_decay_steps = 10000
|
|
||||||
|
|
||||||
class CartPole_MMC(conf.Preset):
|
class CartPole_MMC(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.MMC, conf.GymVectorObservation, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.MMC, conf.GymVectorObservation, conf.ExplorationParameters)
|
||||||
@@ -203,7 +119,7 @@ class CartPole_PAL(conf.Preset):
|
|||||||
|
|
||||||
class CartPole_DFP(conf.Preset):
|
class CartPole_DFP(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Preset.__init__(self, conf.DFP, conf.GymVectorObservation, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.DFP, conf.GymVectorObservation, conf.ExplorationParameters)
|
||||||
self.env.level = 'CartPole-v0'
|
self.env.level = 'CartPole-v0'
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
self.agent.num_episodes_in_experience_replay = 200
|
||||||
self.learning_rate = 0.0001
|
self.learning_rate = 0.0001
|
||||||
@@ -213,40 +129,6 @@ class CartPole_DFP(conf.Preset):
|
|||||||
self.agent.goal_vector = [1.0]
|
self.agent.goal_vector = [1.0]
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_DFP(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'BASIC'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.0001
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
self.exploration.epsilon_decay_steps = 10000
|
|
||||||
self.agent.use_accumulated_reward_as_measurement = True
|
|
||||||
self.agent.goal_vector = [0.0, 1.0]
|
|
||||||
# self.agent.num_consecutive_playing_steps = 10
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Health_DFP(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DFP, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'HEALTH_GATHERING'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
self.exploration.epsilon_decay_steps = 10000
|
|
||||||
self.agent.use_accumulated_reward_as_measurement = True
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Deadly_Corridor_Bootstrapped_DQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.BootstrappedDQN, conf.Doom, conf.BootstrappedDQNExploration)
|
|
||||||
self.env.level = 'deadly_corridor'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class CartPole_Bootstrapped_DQN(conf.Preset):
|
class CartPole_Bootstrapped_DQN(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.BootstrappedDQN, conf.GymVectorObservation, conf.BootstrappedDQNExploration)
|
conf.Preset.__init__(self, conf.BootstrappedDQN, conf.GymVectorObservation, conf.BootstrappedDQNExploration)
|
||||||
@@ -538,16 +420,6 @@ class Atari_DQN_TestBench(conf.Preset):
|
|||||||
self.num_training_iterations = 500
|
self.num_training_iterations = 500
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_PG(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.PolicyGradient, conf.Doom, conf.CategoricalExploration)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.policy_gradient_rescaler = 'FUTURE_RETURN_NORMALIZED_BY_TIMESTEP'
|
|
||||||
self.learning_rate = 0.00001
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.agent.beta_entropy = 0.01
|
|
||||||
|
|
||||||
|
|
||||||
class InvertedPendulum_PG(conf.Preset):
|
class InvertedPendulum_PG(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
|
conf.Preset.__init__(self, conf.PolicyGradient, conf.GymVectorObservation, conf.AdditiveNoiseExploration)
|
||||||
@@ -925,22 +797,6 @@ class CartPole_NEC(conf.Preset):
|
|||||||
self.test_min_return_threshold = 150
|
self.test_min_return_threshold = 150
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_NEC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.learning_rate = 0.00001
|
|
||||||
self.agent.num_transitions_in_experience_replay = 100000
|
|
||||||
# self.exploration.initial_epsilon = 0.1 # TODO: try exploration
|
|
||||||
# self.exploration.final_epsilon = 0.1
|
|
||||||
# self.exploration.epsilon_decay_steps = 1000000
|
|
||||||
self.num_heatup_steps = 200
|
|
||||||
self.evaluation_episodes = 1
|
|
||||||
self.evaluate_every_x_episodes = 5
|
|
||||||
self.seed = 123
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Montezuma_NEC(conf.Preset):
|
class Montezuma_NEC(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
|
||||||
@@ -971,28 +827,6 @@ class Breakout_NEC(conf.Preset):
|
|||||||
self.seed = 123
|
self.seed = 123
|
||||||
|
|
||||||
|
|
||||||
class Doom_Health_NEC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.NEC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'HEALTH_GATHERING'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
self.exploration.epsilon_decay_steps = 10000
|
|
||||||
self.agent.num_playing_steps_between_two_training_steps = 1
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Health_DQN(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.DQN, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'HEALTH_GATHERING'
|
|
||||||
self.agent.num_episodes_in_experience_replay = 200
|
|
||||||
self.learning_rate = 0.00025
|
|
||||||
self.num_heatup_steps = 1000
|
|
||||||
self.exploration.epsilon_decay_steps = 10000
|
|
||||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
|
||||||
|
|
||||||
|
|
||||||
class Pong_NEC_LSTM(conf.Preset):
|
class Pong_NEC_LSTM(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.NEC, conf.Atari, conf.ExplorationParameters)
|
||||||
@@ -1285,23 +1119,6 @@ class BipedalWalker_A3C(conf.Preset):
|
|||||||
self.agent.middleware_type = conf.MiddlewareTypes.FC
|
self.agent.middleware_type = conf.MiddlewareTypes.FC
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_A3C(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.ActorCritic, conf.Doom, conf.CategoricalExploration)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.policy_gradient_rescaler = 'GAE'
|
|
||||||
self.learning_rate = 0.0001
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.env.reward_scaling = 100.
|
|
||||||
self.agent.discount = 0.99
|
|
||||||
self.agent.apply_gradients_every_x_episodes = 1
|
|
||||||
self.agent.num_steps_between_gradient_updates = 30
|
|
||||||
self.agent.gae_lambda = 1
|
|
||||||
self.agent.beta_entropy = 0.01
|
|
||||||
self.clip_gradients = 40
|
|
||||||
self.agent.middleware_type = conf.MiddlewareTypes.FC
|
|
||||||
|
|
||||||
|
|
||||||
class Pong_A3C(conf.Preset):
|
class Pong_A3C(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.ActorCritic, conf.Atari, conf.CategoricalExploration)
|
conf.Preset.__init__(self, conf.ActorCritic, conf.Atari, conf.CategoricalExploration)
|
||||||
@@ -1372,43 +1189,6 @@ class Carla_BC(conf.Preset):
|
|||||||
self.evaluate_every_x_training_iterations = 5000
|
self.evaluate_every_x_training_iterations = 5000
|
||||||
|
|
||||||
|
|
||||||
class Doom_Basic_BC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'basic'
|
|
||||||
self.agent.load_memory_from_file_path = 'datasets/doom_basic.p'
|
|
||||||
self.learning_rate = 0.0005
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.evaluation_episodes = 5
|
|
||||||
self.batch_size = 120
|
|
||||||
self.evaluate_every_x_training_iterations = 100
|
|
||||||
self.num_training_iterations = 2000
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Defend_BC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'defend'
|
|
||||||
self.agent.load_memory_from_file_path = 'datasets/doom_defend.p'
|
|
||||||
self.learning_rate = 0.0005
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.evaluation_episodes = 5
|
|
||||||
self.batch_size = 120
|
|
||||||
self.evaluate_every_x_training_iterations = 100
|
|
||||||
|
|
||||||
|
|
||||||
class Doom_Deathmatch_BC(conf.Preset):
|
|
||||||
def __init__(self):
|
|
||||||
conf.Preset.__init__(self, conf.BC, conf.Doom, conf.ExplorationParameters)
|
|
||||||
self.env.level = 'deathmatch'
|
|
||||||
self.agent.load_memory_from_file_path = 'datasets/doom_deathmatch.p'
|
|
||||||
self.learning_rate = 0.0005
|
|
||||||
self.num_heatup_steps = 0
|
|
||||||
self.evaluation_episodes = 5
|
|
||||||
self.batch_size = 120
|
|
||||||
self.evaluate_every_x_training_iterations = 100
|
|
||||||
|
|
||||||
|
|
||||||
class MontezumaRevenge_BC(conf.Preset):
|
class MontezumaRevenge_BC(conf.Preset):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
conf.Preset.__init__(self, conf.BC, conf.Atari, conf.ExplorationParameters)
|
conf.Preset.__init__(self, conf.BC, conf.Atari, conf.ExplorationParameters)
|
||||||
|
|||||||
Reference in New Issue
Block a user