From 2021490caa3555a7a704a69b254bbd5db16e285d Mon Sep 17 00:00:00 2001 From: Gal Leibovich Date: Thu, 23 Aug 2018 15:59:00 +0300 Subject: [PATCH] small adjustment to golden tests + fixes for Doom_Health_DFP and Doom_Health_Supreme_DFP --- rl_coach/presets/Doom_Health_DFP.py | 10 ++++++++-- rl_coach/presets/Doom_Health_Supreme_DFP.py | 11 +++++++++-- rl_coach/tests/golden_tests.py | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rl_coach/presets/Doom_Health_DFP.py b/rl_coach/presets/Doom_Health_DFP.py index c4d9a8f..1cf358c 100644 --- a/rl_coach/presets/Doom_Health_DFP.py +++ b/rl_coach/presets/Doom_Health_DFP.py @@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(6250000) -schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500) -schedule_params.evaluation_steps = EnvironmentSteps(6250) +# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated +# much often. +# schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500) +# schedule_params.evaluation_steps = EnvironmentSteps(6250) +schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5) +schedule_params.evaluation_steps = EnvironmentEpisodes(1) # There is no heatup for DFP. heatup length is determined according to batch size. See below. @@ -31,6 +35,8 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000) agent_params.exploration.evaluation_epsilon = 0 agent_params.algorithm.goal_vector = [1] # health +# this works better than the default which is set to 8 (while running with 8 workers) +agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) # scale observation and measurements to be -0.5 <-> 0.5 agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100. diff --git a/rl_coach/presets/Doom_Health_Supreme_DFP.py b/rl_coach/presets/Doom_Health_Supreme_DFP.py index f43da45..93a099a 100644 --- a/rl_coach/presets/Doom_Health_Supreme_DFP.py +++ b/rl_coach/presets/Doom_Health_Supreme_DFP.py @@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(6250000) -schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500) -schedule_params.evaluation_steps = EnvironmentSteps(6250) +# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated +# much often. +# schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500) +# schedule_params.evaluation_steps = EnvironmentSteps(6250) +schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5) +schedule_params.evaluation_steps = EnvironmentEpisodes(1) # There is no heatup for DFP. heatup length is determined according to batch size. See below. @@ -31,6 +35,9 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000) agent_params.exploration.evaluation_epsilon = 0 agent_params.algorithm.goal_vector = [1] # health +# this works better than the default which is set to 8 (while running with 8 workers) +agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) + # scale observation and measurements to be -0.5 <-> 0.5 agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100. agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_offset['vector'] = 0.5 diff --git a/rl_coach/tests/golden_tests.py b/rl_coach/tests/golden_tests.py index 3ae6f87..3c0a0b8 100644 --- a/rl_coach/tests/golden_tests.py +++ b/rl_coach/tests/golden_tests.py @@ -32,7 +32,7 @@ import time from rl_coach.logger import screen -def read_csv_paths(test_path, filename_pattern, read_csv_tries=50): +def read_csv_paths(test_path, filename_pattern, read_csv_tries=100): csv_paths = [] tries_counter = 0 while not csv_paths: