1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Multiple improvements and bug fixes (#66)

* Multiple improvements and bug fixes:

    * Using lazy stacking to save on memory when using a replay buffer
    * Remove step counting for evaluation episodes
    * Reset game between heatup and training
    * Major bug fixes in NEC (is reproducing the paper results for pong now)
    * Image input rescaling to 0-1 is now optional
    * Change the terminal title to be the experiment name
    * Observation cropping for atari is now optional
    * Added random number of noop actions for gym to match the dqn paper
    * Fixed a bug where the evaluation episodes won't start with the max possible ale lives
    * Added a script for plotting the results of an experiment over all the atari games
This commit is contained in:
Itai Caspi
2018-02-26 12:29:07 +02:00
committed by GitHub
parent 4fe9cba445
commit a7206ed702
20 changed files with 465 additions and 158 deletions

View File

@@ -115,6 +115,7 @@ class AgentParameters(Parameters):
replace_mse_with_huber_loss = False
load_memory_from_file_path = None
collect_new_data = True
input_rescaler = 255.0
# PPO related params
target_kl_divergence = 0.01
@@ -154,6 +155,8 @@ class EnvironmentParameters(Parameters):
desired_observation_width = 76
desired_observation_height = 60
normalize_observation = False
crop_observation = False
random_initialization_steps = 0
reward_scaling = 1.0
reward_clipping_min = None
reward_clipping_max = None
@@ -290,6 +293,8 @@ class Atari(EnvironmentParameters):
desired_observation_width = 84
reward_clipping_max = 1.0
reward_clipping_min = -1.0
random_initialization_steps = 30
crop_observation = False # in the original paper the observation is cropped but not in the Nature paper
class Doom(EnvironmentParameters):
@@ -355,6 +360,7 @@ class DQN(AgentParameters):
class DDQN(DQN):
type = 'DDQNAgent'
num_steps_between_copying_online_weights_to_target = 30000
class DuelingDQN(DQN):
@@ -384,17 +390,19 @@ class QuantileRegressionDQN(DQN):
class NEC(AgentParameters):
type = 'NECAgent'
optimizer_type = 'RMSProp'
optimizer_type = 'Adam'
input_types = {'observation': InputTypes.Observation}
output_types = [OutputTypes.DNDQ]
loss_weights = [1.0]
dnd_size = 500000
l2_norm_added_delta = 0.001
new_value_shift_coefficient = 0.1
new_value_shift_coefficient = 0.1 # alpha
number_of_knn = 50
n_step = 100
bootstrap_total_return_from_old_policy = True
DND_key_error_threshold = 0.1
DND_key_error_threshold = 0
input_rescaler = 1.0
num_consecutive_playing_steps = 4
class ActorCritic(AgentParameters):