small adjustment to golden tests + fixes for Doom_Health_DFP and Doom_Health_Supreme_DFP

2026-02-14 12:55:51 +01:00 · 2018-08-23 15:59:00 +03:00
parent 3fd0bf4f0f
commit 2021490caa
3 changed files with 18 additions and 5 deletions
--- a/rl_coach/presets/Doom_Health_DFP.py
+++ b/rl_coach/presets/Doom_Health_DFP.py
@@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters
 schedule_params = ScheduleParameters()
 schedule_params.improve_steps = EnvironmentSteps(6250000)
-schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
+# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated
-schedule_params.evaluation_steps = EnvironmentSteps(6250)
+# much often.
 # schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
 # schedule_params.evaluation_steps = EnvironmentSteps(6250)
 schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5)
 schedule_params.evaluation_steps = EnvironmentEpisodes(1)
 # There is no heatup for DFP. heatup length is determined according to batch size. See below.
@@ -31,6 +35,8 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000)
 agent_params.exploration.evaluation_epsilon = 0
 agent_params.algorithm.goal_vector = [1]  # health
 # this works better than the default which is set to 8 (while running with 8 workers)
 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
 # scale observation and measurements to be -0.5 <-> 0.5
 agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
--- a/rl_coach/presets/Doom_Health_Supreme_DFP.py
+++ b/rl_coach/presets/Doom_Health_Supreme_DFP.py
@@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters
 schedule_params = ScheduleParameters()
 schedule_params.improve_steps = EnvironmentSteps(6250000)
-schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
+# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated
-schedule_params.evaluation_steps = EnvironmentSteps(6250)
+# much often.
 # schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
 # schedule_params.evaluation_steps = EnvironmentSteps(6250)
 schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5)
 schedule_params.evaluation_steps = EnvironmentEpisodes(1)
 # There is no heatup for DFP. heatup length is determined according to batch size. See below.
@@ -31,6 +35,9 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000)
 agent_params.exploration.evaluation_epsilon = 0
 agent_params.algorithm.goal_vector = [1]  # health
 # this works better than the default which is set to 8 (while running with 8 workers)
 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
 # scale observation and measurements to be -0.5 <-> 0.5
 agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
 agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_offset['vector'] = 0.5
--- a/rl_coach/tests/golden_tests.py
+++ b/rl_coach/tests/golden_tests.py
@@ -32,7 +32,7 @@ import time
 from rl_coach.logger import screen
-def read_csv_paths(test_path, filename_pattern, read_csv_tries=50):
+def read_csv_paths(test_path, filename_pattern, read_csv_tries=100):
    csv_paths = []
    tries_counter = 0
    while not csv_paths: