1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00

S3 optimization - save only the latest checkpoint. (#148)

This commit is contained in:
Balaji Subramaniam
2018-11-23 22:17:36 -08:00
committed by Ajay Deshpande
parent 13d2679af4
commit bf2036b284

View File

@@ -54,22 +54,27 @@ class S3DataStore(DataStore):
try: try:
# remove lock file if it exists # remove lock file if it exists
self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value)
# Acquire lock # Acquire lock
self.mc.put_object(self.params.bucket_name, SyncFiles.LOCKFILE.value, io.BytesIO(b''), 0) self.mc.put_object(self.params.bucket_name, SyncFiles.LOCKFILE.value, io.BytesIO(b''), 0)
checkpoint_file = None state_file = CheckpointStateFile(os.path.abspath(self.params.checkpoint_dir))
for root, dirs, files in os.walk(self.params.checkpoint_dir): if state_file.exists():
for filename in files: ckpt_state = state_file.read()
if filename == CheckpointStateFile.checkpoint_state_filename: checkpoint_file = None
checkpoint_file = (root, filename) for root, dirs, files in os.walk(self.params.checkpoint_dir):
continue for filename in files:
abs_name = os.path.abspath(os.path.join(root, filename)) if filename == CheckpointStateFile.checkpoint_state_filename:
rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir) checkpoint_file = (root, filename)
self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) continue
if filename.startswith(ckpt_state.name):
abs_name = os.path.abspath(os.path.join(root, filename))
rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir)
self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)
abs_name = os.path.abspath(os.path.join(checkpoint_file[0], checkpoint_file[1])) abs_name = os.path.abspath(os.path.join(checkpoint_file[0], checkpoint_file[1]))
rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir) rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir)
self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)
# release lock # release lock
self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value) self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value)