diff --git a/benchmarks/acer/breakout_acer_16_workers.png b/benchmarks/acer/breakout_acer_16_workers.png index ed5c12f..fe6859c 100644 Binary files a/benchmarks/acer/breakout_acer_16_workers.png and b/benchmarks/acer/breakout_acer_16_workers.png differ diff --git a/benchmarks/acer/space_invaders_acer_16_workers.png b/benchmarks/acer/space_invaders_acer_16_workers.png index afe5ef2..8138037 100644 Binary files a/benchmarks/acer/space_invaders_acer_16_workers.png and b/benchmarks/acer/space_invaders_acer_16_workers.png differ diff --git a/rl_coach/agents/acer_agent.py b/rl_coach/agents/acer_agent.py index a539e8c..94e76b6 100644 --- a/rl_coach/agents/acer_agent.py +++ b/rl_coach/agents/acer_agent.py @@ -68,7 +68,7 @@ class ACERAlgorithmParameters(AlgorithmParameters): self.num_steps_between_gradient_updates = 5000 self.ratio_of_replay = 4 self.num_transitions_to_start_replay = 10000 - self.rate_for_copying_weights_to_target = 0.99 + self.rate_for_copying_weights_to_target = 0.01 self.importance_weight_truncation = 10.0 self.use_trust_region_optimization = True self.max_KL_divergence = 1.0 diff --git a/rl_coach/presets/Atari_ACER.py b/rl_coach/presets/Atari_ACER.py index 4d96d77..a3bd276 100644 --- a/rl_coach/presets/Atari_ACER.py +++ b/rl_coach/presets/Atari_ACER.py @@ -11,7 +11,7 @@ from rl_coach.memories.memory import MemoryGranularity # Graph Scheduling # #################### schedule_params = ScheduleParameters() -schedule_params.improve_steps = TrainingSteps(10000000000) +schedule_params.improve_steps = TrainingSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(0)