1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

add heatup step to training worker

This commit is contained in:
Zach Dwiel
2018-09-18 19:55:09 +00:00
committed by zach dwiel
parent 7c1f0dce4f
commit 04f32a0f02

View File

@@ -10,6 +10,13 @@ from rl_coach.utils import short_dynamic_import
# Q: specify alternative distributed memory, or should this go in the preset? # Q: specify alternative distributed memory, or should this go in the preset?
# A: preset must define distributed memory to be used. we aren't going to take a non-distributed preset and automatically distribute it. # A: preset must define distributed memory to be used. we aren't going to take a non-distributed preset and automatically distribute it.
def heatup(graph_manager):
num_steps = graph_manager.schedule_params.heatup_steps.num_steps
while len(graph_manager.agent_params.memory) < num_steps:
time.sleep(1)
def training_worker(graph_manager, checkpoint_dir): def training_worker(graph_manager, checkpoint_dir):
""" """
restore a checkpoint then perform rollouts using the restored model restore a checkpoint then perform rollouts using the restored model
@@ -22,11 +29,12 @@ def training_worker(graph_manager, checkpoint_dir):
# save randomly initialized graph # save randomly initialized graph
graph_manager.save_checkpoint() graph_manager.save_checkpoint()
# TODO: critical: wait for minimum number of rollouts in memory before training
# TODO: Q: training steps passed into graph_manager.train ignored? # TODO: Q: training steps passed into graph_manager.train ignored?
# TODO: specify training steps between checkpoints (in preset?) # TODO: specify training steps between checkpoints (in preset?)
# TODO: replace while true with what? number of steps, convergence, time, ... # TODO: replace outer training loop with something general
# TODO: low: move evaluate out of this process # TODO: low priority: move evaluate out of this process
heatup(graph_manager)
# training loop # training loop
for _ in range(10): for _ in range(10):