1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-25 04:03:33 +01:00

Itaicaspi/episode reset refactoring (#105)

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* reordering of the episode reset operation and allowing to store episodes only when they are terminated

* revert tensorflow-gpu to 1.9.0 + bug fix in should_train()

* tests readme file and refactoring of policy optimization agent train function

* Update README.md

* Update README.md

* additional policy optimization train function simplifications

* Updated the traces after the reordering of the environment reset

* docker and jenkins files

* updated the traces to the ones from within the docker container

* updated traces and added control suite to the docker

* updated jenkins file with the intel proxy + updated doom basic a3c test params

* updated line breaks in jenkins file

* added a missing line break in jenkins file

* refining trace tests ignored presets + adding a configurable beta entropy value

* switch the order of trace and golden tests in jenkins + fix golden tests processes not killed issue

* updated benchmarks for dueling ddqn breakout and pong

* allowing dynamic updates to the loss weights + bug fix in episode.update_returns

* remove docker and jenkins file
This commit is contained in:
Itai Caspi
2018-09-04 15:07:54 +03:00
committed by GitHub
parent 7086492127
commit 72a1d9d426
92 changed files with 9803 additions and 9740 deletions

View File

@@ -67,24 +67,21 @@ def perform_reward_based_tests(args, preset_validation_params, preset_name):
# run the experiment in a separate thread
screen.log_title("Running test {}".format(preset_name))
log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
cmd = (
'python3 rl_coach/coach.py '
'-p {preset_name} '
'-e {test_name} '
'-n {num_workers} '
'--seed 0 '
'-c '
'{level} '
'&> {log_file_name} '
).format(
preset_name=preset_name,
test_name=test_name,
num_workers=preset_validation_params.num_workers,
log_file_name=log_file_name,
level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else ''
)
cmd = [
'python3',
'rl_coach/coach.py',
'-p', '{preset_name}'.format(preset_name=preset_name),
'-e', '{test_name}'.format(test_name=test_name),
'-n', '{num_workers}'.format(num_workers=preset_validation_params.num_workers),
'--seed', '0',
'-c'
]
if preset_validation_params.reward_test_level:
cmd += ['-lvl', '{level}'.format(level=preset_validation_params.reward_test_level)]
p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
stdout = open(log_file_name, 'w')
p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout)
start_time = time.time()
@@ -148,7 +145,8 @@ def perform_reward_based_tests(args, preset_validation_params, preset_name):
time.sleep(1)
# kill test and print result
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
# os.killpg(os.getpgid(p.pid), signal.SIGKILL)
p.kill()
screen.log('')
if test_passed:
screen.success("Passed successfully")