1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 19:50:17 +01:00

integration test changes to reach the train part (#254)

* integration test changes to override heatup to 1000 steps +  run each preset for 30 sec (to make sure we reach the train part)

* fixes to failing presets uncovered with this change + changes in the golden testing to properly test BatchRL

* fix for rainbow dqn

* fix to gym_environment (due to a change in Gym 0.12.1) + fix for rainbow DQN + some bug-fix in utils.squeeze_list

* fix for NEC agent
This commit is contained in:
Gal Leibovich
2019-03-27 21:14:19 +02:00
committed by GitHub
parent 6e08c55ad5
commit 310d31c227
8 changed files with 28 additions and 17 deletions

View File

@@ -57,14 +57,16 @@ def test_preset_runs(preset):
experiment_name = ".test-" + preset
params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name]
# overriding heatup steps to some small number of steps (1000), so to finish the heatup stage, and get to train
params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", experiment_name, '-cp',
'heatup_steps=EnvironmentSteps(1000)']
if level != "":
params += ["-lvl", level]
p = Popen(params)
# wait 10 seconds overhead of initialization etc.
time.sleep(10)
# wait 30 seconds overhead of initialization, and finishing heatup.
time.sleep(30)
return_value = p.poll()
if return_value is None:

View File

@@ -33,7 +33,7 @@ import pytest
from rl_coach.logger import screen
def read_csv_paths(test_path, filename_pattern, read_csv_tries=100):
def read_csv_paths(test_path, filename_pattern, read_csv_tries=200):
csv_paths = []
tries_counter = 0
while not csv_paths:
@@ -155,7 +155,7 @@ def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, ve
if not no_progress_bar:
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit)
while csv is None or (csv['Episode #'].values[
while csv is None or (csv[csv.columns[0]].values[
-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < time_limit):
try:
csv = pd.read_csv(csv_path)
@@ -179,10 +179,10 @@ def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, ve
if not no_progress_bar:
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, time_limit)
if csv['Episode #'].shape[0] - last_num_episodes <= 0:
if csv[csv.columns[0]].shape[0] - last_num_episodes <= 0:
continue
last_num_episodes = csv['Episode #'].values[-1]
last_num_episodes = csv[csv.columns[0]].values[-1]
# check if reward is enough
if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold):
@@ -213,6 +213,7 @@ def test_preset_reward(preset_name, no_progress_bar=True, time_limit=60 * 60, ve
preset_validation_params.min_reward_threshold), crash=False)
screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
screen.error("training iteration: {}".format(csv['Training Iter'].values[-1]), crash=False)
else:
screen.error("csv file never found", crash=False)
if verbose: