1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 03:30:19 +01:00

Adding worker logs and plumbed task_parameters to distributed coach (#130)

This commit is contained in:
Ajay Deshpande
2018-11-23 15:35:11 -08:00
committed by Balaji Subramaniam
parent 2b4c9c6774
commit 4a6c404070
5 changed files with 84 additions and 41 deletions

View File

@@ -68,21 +68,17 @@ def get_latest_checkpoint(checkpoint_dir):
rel_path = os.path.relpath(ckpt.model_checkpoint_path, checkpoint_dir)
return int(rel_path.split('_Step')[0])
return 0
def should_stop(checkpoint_dir):
return os.path.exists(os.path.join(checkpoint_dir, SyncFiles.FINISHED.value))
def rollout_worker(graph_manager, checkpoint_dir, data_store, num_workers):
def rollout_worker(graph_manager, data_store, num_workers, task_parameters):
"""
wait for first checkpoint then perform rollouts using the model
"""
wait_for_checkpoint(checkpoint_dir)
task_parameters = TaskParameters()
task_parameters.__dict__['checkpoint_restore_dir'] = checkpoint_dir
checkpoint_dir = task_parameters.checkpoint_restore_dir
wait_for_checkpoint(checkpoint_dir, data_store)
graph_manager.create_graph(task_parameters)
with graph_manager.phase_context(RunPhase.TRAIN):