mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Multiple improvements and bug fixes (#66)
* Multiple improvements and bug fixes:
* Using lazy stacking to save on memory when using a replay buffer
* Remove step counting for evaluation episodes
* Reset game between heatup and training
* Major bug fixes in NEC (is reproducing the paper results for pong now)
* Image input rescaling to 0-1 is now optional
* Change the terminal title to be the experiment name
* Observation cropping for atari is now optional
* Added random number of noop actions for gym to match the dqn paper
* Fixed a bug where the evaluation episodes won't start with the max possible ale lives
* Added a script for plotting the results of an experiment over all the atari games
This commit is contained in:
@@ -115,6 +115,7 @@ class AgentParameters(Parameters):
|
||||
replace_mse_with_huber_loss = False
|
||||
load_memory_from_file_path = None
|
||||
collect_new_data = True
|
||||
input_rescaler = 255.0
|
||||
|
||||
# PPO related params
|
||||
target_kl_divergence = 0.01
|
||||
@@ -154,6 +155,8 @@ class EnvironmentParameters(Parameters):
|
||||
desired_observation_width = 76
|
||||
desired_observation_height = 60
|
||||
normalize_observation = False
|
||||
crop_observation = False
|
||||
random_initialization_steps = 0
|
||||
reward_scaling = 1.0
|
||||
reward_clipping_min = None
|
||||
reward_clipping_max = None
|
||||
@@ -290,6 +293,8 @@ class Atari(EnvironmentParameters):
|
||||
desired_observation_width = 84
|
||||
reward_clipping_max = 1.0
|
||||
reward_clipping_min = -1.0
|
||||
random_initialization_steps = 30
|
||||
crop_observation = False # in the original paper the observation is cropped but not in the Nature paper
|
||||
|
||||
|
||||
class Doom(EnvironmentParameters):
|
||||
@@ -355,6 +360,7 @@ class DQN(AgentParameters):
|
||||
|
||||
class DDQN(DQN):
|
||||
type = 'DDQNAgent'
|
||||
num_steps_between_copying_online_weights_to_target = 30000
|
||||
|
||||
|
||||
class DuelingDQN(DQN):
|
||||
@@ -384,17 +390,19 @@ class QuantileRegressionDQN(DQN):
|
||||
|
||||
class NEC(AgentParameters):
|
||||
type = 'NECAgent'
|
||||
optimizer_type = 'RMSProp'
|
||||
optimizer_type = 'Adam'
|
||||
input_types = {'observation': InputTypes.Observation}
|
||||
output_types = [OutputTypes.DNDQ]
|
||||
loss_weights = [1.0]
|
||||
dnd_size = 500000
|
||||
l2_norm_added_delta = 0.001
|
||||
new_value_shift_coefficient = 0.1
|
||||
new_value_shift_coefficient = 0.1 # alpha
|
||||
number_of_knn = 50
|
||||
n_step = 100
|
||||
bootstrap_total_return_from_old_policy = True
|
||||
DND_key_error_threshold = 0.1
|
||||
DND_key_error_threshold = 0
|
||||
input_rescaler = 1.0
|
||||
num_consecutive_playing_steps = 4
|
||||
|
||||
|
||||
class ActorCritic(AgentParameters):
|
||||
|
||||
Reference in New Issue
Block a user