QR-DQN bug fix and imporvements (#30)

* bug fix - QR-DQN using error instead of abs-error in the quantile huber loss * improvement - QR-DQN sorting the quantile only once instead of batch_size times * new feature - adding the Breakout QRDQN preset (verified to achieve good results)
2026-02-21 01:05:50 +01:00 · 2017-11-29 14:01:59 +02:00
parent 7bdba396d2
commit 11faf19649
3 changed files with 23 additions and 3 deletions
--- a/presets.py
+++ b/presets.py
@@ -422,6 +422,24 @@ class Breakout_C51(Preset):
        self.evaluate_every_x_episodes = 5000000


+
+class Breakout_QRDQN(Preset):
+    def __init__(self):
+        Preset.__init__(self, QuantileRegressionDQN, Atari, ExplorationParameters)
+        self.env.level = 'BreakoutDeterministic-v4'
+        self.agent.num_steps_between_copying_online_weights_to_target = 10000
+        self.learning_rate = 0.00025
+        self.agent.num_transitions_in_experience_replay = 1000000
+        self.exploration.initial_epsilon = 1.0
+        self.exploration.final_epsilon = 0.01
+        self.exploration.epsilon_decay_steps = 1000000
+        self.exploration.evaluation_policy = 'EGreedy'
+        self.exploration.evaluation_epsilon = 0.001
+        self.num_heatup_steps = 50000
+        self.evaluation_episodes = 1
+        self.evaluate_every_x_episodes = 50
+
+
 class Atari_DQN_TestBench(Preset):
    def __init__(self):
        Preset.__init__(self, DQN, Atari, ExplorationParameters)