Multiple improvements and bug fixes (#66)

* Multiple improvements and bug fixes: * Using lazy stacking to save on memory when using a replay buffer * Remove step counting for evaluation episodes * Reset game between heatup and training * Major bug fixes in NEC (is reproducing the paper results for pong now) * Image input rescaling to 0-1 is now optional * Change the terminal title to be the experiment name * Observation cropping for atari is now optional * Added random number of noop actions for gym to match the dqn paper * Fixed a bug where the evaluation episodes won't start with the max possible ale lives * Added a script for plotting the results of an experiment over all the atari games
2026-02-14 12:55:51 +01:00 · 2018-02-26 12:29:07 +02:00
parent 4fe9cba445
commit a7206ed702
20 changed files with 465 additions and 158 deletions
--- a/configurations.py
+++ b/configurations.py
@@ -115,6 +115,7 @@ class AgentParameters(Parameters):
    replace_mse_with_huber_loss = False
    load_memory_from_file_path = None
    collect_new_data = True
+    input_rescaler = 255.0

    # PPO related params
    target_kl_divergence = 0.01
@@ -154,6 +155,8 @@ class EnvironmentParameters(Parameters):
    desired_observation_width = 76
    desired_observation_height = 60
    normalize_observation = False
+    crop_observation = False
+    random_initialization_steps = 0
    reward_scaling = 1.0
    reward_clipping_min = None
    reward_clipping_max = None
@@ -290,6 +293,8 @@ class Atari(EnvironmentParameters):
    desired_observation_width = 84
    reward_clipping_max = 1.0
    reward_clipping_min = -1.0
+    random_initialization_steps = 30
+    crop_observation = False  # in the original paper the observation is cropped but not in the Nature paper


 class Doom(EnvironmentParameters):
@@ -355,6 +360,7 @@ class DQN(AgentParameters):

 class DDQN(DQN):
    type = 'DDQNAgent'
+    num_steps_between_copying_online_weights_to_target = 30000


 class DuelingDQN(DQN):
@@ -384,17 +390,19 @@ class QuantileRegressionDQN(DQN):

 class NEC(AgentParameters):
    type = 'NECAgent'
-    optimizer_type = 'RMSProp'
+    optimizer_type = 'Adam'
    input_types = {'observation': InputTypes.Observation}
    output_types = [OutputTypes.DNDQ]
    loss_weights = [1.0]
    dnd_size = 500000
    l2_norm_added_delta = 0.001
-    new_value_shift_coefficient = 0.1
+    new_value_shift_coefficient = 0.1  # alpha
    number_of_knn = 50
    n_step = 100
    bootstrap_total_return_from_old_policy = True
-    DND_key_error_threshold = 0.1
+    DND_key_error_threshold = 0
+    input_rescaler = 1.0
+    num_consecutive_playing_steps = 4


 class ActorCritic(AgentParameters):