mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
small adjustment to golden tests + fixes for Doom_Health_DFP and Doom_Health_Supreme_DFP
This commit is contained in:
@@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters
|
|||||||
|
|
||||||
schedule_params = ScheduleParameters()
|
schedule_params = ScheduleParameters()
|
||||||
schedule_params.improve_steps = EnvironmentSteps(6250000)
|
schedule_params.improve_steps = EnvironmentSteps(6250000)
|
||||||
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
|
# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated
|
||||||
schedule_params.evaluation_steps = EnvironmentSteps(6250)
|
# much often.
|
||||||
|
# schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
|
||||||
|
# schedule_params.evaluation_steps = EnvironmentSteps(6250)
|
||||||
|
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5)
|
||||||
|
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
|
||||||
|
|
||||||
# There is no heatup for DFP. heatup length is determined according to batch size. See below.
|
# There is no heatup for DFP. heatup length is determined according to batch size. See below.
|
||||||
|
|
||||||
@@ -31,6 +35,8 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000)
|
|||||||
agent_params.exploration.evaluation_epsilon = 0
|
agent_params.exploration.evaluation_epsilon = 0
|
||||||
agent_params.algorithm.goal_vector = [1] # health
|
agent_params.algorithm.goal_vector = [1] # health
|
||||||
|
|
||||||
|
# this works better than the default which is set to 8 (while running with 8 workers)
|
||||||
|
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
|
||||||
|
|
||||||
# scale observation and measurements to be -0.5 <-> 0.5
|
# scale observation and measurements to be -0.5 <-> 0.5
|
||||||
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
|
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
|
||||||
|
|||||||
@@ -15,8 +15,12 @@ from rl_coach.environments.doom_environment import DoomEnvironmentParameters
|
|||||||
|
|
||||||
schedule_params = ScheduleParameters()
|
schedule_params = ScheduleParameters()
|
||||||
schedule_params.improve_steps = EnvironmentSteps(6250000)
|
schedule_params.improve_steps = EnvironmentSteps(6250000)
|
||||||
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
|
# original paper evaluates according to these. But, this preset converges significantly faster - can be evaluated
|
||||||
schedule_params.evaluation_steps = EnvironmentSteps(6250)
|
# much often.
|
||||||
|
# schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
|
||||||
|
# schedule_params.evaluation_steps = EnvironmentSteps(6250)
|
||||||
|
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(5)
|
||||||
|
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
|
||||||
|
|
||||||
# There is no heatup for DFP. heatup length is determined according to batch size. See below.
|
# There is no heatup for DFP. heatup length is determined according to batch size. See below.
|
||||||
|
|
||||||
@@ -31,6 +35,9 @@ agent_params.exploration.epsilon_schedule = LinearSchedule(0.5, 0, 10000)
|
|||||||
agent_params.exploration.evaluation_epsilon = 0
|
agent_params.exploration.evaluation_epsilon = 0
|
||||||
agent_params.algorithm.goal_vector = [1] # health
|
agent_params.algorithm.goal_vector = [1] # health
|
||||||
|
|
||||||
|
# this works better than the default which is set to 8 (while running with 8 workers)
|
||||||
|
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
|
||||||
|
|
||||||
# scale observation and measurements to be -0.5 <-> 0.5
|
# scale observation and measurements to be -0.5 <-> 0.5
|
||||||
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
|
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_rescaling['vector'] = 100.
|
||||||
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_offset['vector'] = 0.5
|
agent_params.network_wrappers['main'].input_embedders_parameters['measurements'].input_offset['vector'] = 0.5
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import time
|
|||||||
from rl_coach.logger import screen
|
from rl_coach.logger import screen
|
||||||
|
|
||||||
|
|
||||||
def read_csv_paths(test_path, filename_pattern, read_csv_tries=50):
|
def read_csv_paths(test_path, filename_pattern, read_csv_tries=100):
|
||||||
csv_paths = []
|
csv_paths = []
|
||||||
tries_counter = 0
|
tries_counter = 0
|
||||||
while not csv_paths:
|
while not csv_paths:
|
||||||
|
|||||||
Reference in New Issue
Block a user