mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
new feature - implementation of Quantile Regression DQN (https://arxiv.org/pdf/1710.10044v1.pdf)
API change - Distributional DQN renamed to Categorical DQN
This commit is contained in:
32
presets.py
32
presets.py
@@ -70,6 +70,18 @@ class Doom_Basic_DQN(Preset):
|
||||
self.num_heatup_steps = 1000
|
||||
|
||||
|
||||
|
||||
class Doom_Basic_QRDQN(Preset):
|
||||
def __init__(self):
|
||||
Preset.__init__(self, QuantileRegressionDQN, Doom, ExplorationParameters)
|
||||
self.env.level = 'basic'
|
||||
self.agent.num_steps_between_copying_online_weights_to_target = 1000
|
||||
self.learning_rate = 0.00025
|
||||
self.agent.num_episodes_in_experience_replay = 200
|
||||
self.num_heatup_steps = 1000
|
||||
|
||||
|
||||
|
||||
class Doom_Basic_OneStepQ(Preset):
|
||||
def __init__(self):
|
||||
Preset.__init__(self, NStepQ, Doom, ExplorationParameters)
|
||||
@@ -340,9 +352,9 @@ class CartPole_DQN(Preset):
|
||||
self.test_min_return_threshold = 150
|
||||
|
||||
|
||||
class CartPole_DistributionalDQN(Preset):
|
||||
class CartPole_C51(Preset):
|
||||
def __init__(self):
|
||||
Preset.__init__(self, DistributionalDQN, GymVectorObservation, ExplorationParameters)
|
||||
Preset.__init__(self, CategoricalDQN, GymVectorObservation, ExplorationParameters)
|
||||
self.env.level = 'CartPole-v0'
|
||||
self.agent.num_steps_between_copying_online_weights_to_target = 100
|
||||
self.learning_rate = 0.00025
|
||||
@@ -356,6 +368,18 @@ class CartPole_DistributionalDQN(Preset):
|
||||
self.test_min_return_threshold = 150
|
||||
|
||||
|
||||
class CartPole_QRDQN(Preset):
|
||||
def __init__(self):
|
||||
Preset.__init__(self, QuantileRegressionDQN, GymVectorObservation, ExplorationParameters)
|
||||
self.env.level = 'CartPole-v0'
|
||||
self.agent.num_steps_between_copying_online_weights_to_target = 100
|
||||
self.learning_rate = 0.00025
|
||||
self.agent.num_episodes_in_experience_replay = 200
|
||||
self.num_heatup_steps = 1000
|
||||
self.exploration.epsilon_decay_steps = 3000
|
||||
self.agent.discount = 1.0
|
||||
|
||||
|
||||
# The below preset matches the hyper-parameters setting as in the original DQN paper.
|
||||
# This a very resource intensive preset, and might easily blow up your RAM (> 100GB of usage).
|
||||
# Try reducing the number of transitions in the experience replay (50e3 might be a reasonable number to start with),
|
||||
@@ -377,9 +401,9 @@ class Breakout_DQN(Preset):
|
||||
self.evaluate_every_x_episodes = 100
|
||||
|
||||
|
||||
class Breakout_DistributionalDQN(Preset):
|
||||
class Breakout_C51(Preset):
|
||||
def __init__(self):
|
||||
Preset.__init__(self, DistributionalDQN, Atari, ExplorationParameters)
|
||||
Preset.__init__(self, CategoricalDQN, Atari, ExplorationParameters)
|
||||
self.env.level = 'BreakoutDeterministic-v4'
|
||||
self.agent.num_steps_between_copying_online_weights_to_target = 10000
|
||||
self.learning_rate = 0.00025
|
||||
|
||||
Reference in New Issue
Block a user