1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 19:50:17 +01:00

Adding checkpointing framework (#74)

* Adding checkpointing framework as well as mxnet checkpointing implementation.

- MXNet checkpoint for each network is saved in a separate file.

* Adding checkpoint restore for mxnet to graph-manager

* Add unit-test for get_checkpoint_state()

* Added match.group() to fix unit-test failing on CI

* Added ONNX export support for MXNet
This commit is contained in:
Sina Afrooze
2018-11-19 09:45:49 -08:00
committed by shadiendrawis
parent 4da56b1ff2
commit 67eb9e4c28
19 changed files with 598 additions and 29 deletions

View File

@@ -142,3 +142,42 @@ def test_hybrid_clip():
b = mx.nd.array((2,))
clipped = hybrid_clip(F=mx.nd, x=x, clip_lower=a, clip_upper=b)
assert (np.isclose(a= clipped.asnumpy(), b=(1, 1.5, 2))).all()
@pytest.mark.unit_test
def test_scoped_onxx_enable():
class Counter(object):
def __init__(self):
self._count = 0
def increment(self):
self._count += 1
@property
def count(self):
return self._count
class TempBlock(gluon.HybridBlock, OnnxHandlerBlock):
def __init__(self, counter: Counter):
super(TempBlock, self).__init__()
OnnxHandlerBlock.__init__(self)
self._counter = counter
def hybrid_forward(self, F, x, *args, **kwargs):
if self._onnx:
self._counter.increment()
return x
counter = Counter()
net = gluon.nn.HybridSequential()
for _ in range(10):
net.add(TempBlock(counter))
# ONNX disabled
net(nd.zeros((1,)))
assert counter.count == 0
# ONNX enabled
with ScopedOnnxEnable(net):
net(nd.zeros((1,)))
assert counter.count == 10

View File

@@ -0,0 +1,42 @@
import pytest
from rl_coach.saver import Saver, SaverCollection
@pytest.mark.unit_test
def test_checkpoint_collection():
class SaverTest(Saver):
def __init__(self, path):
self._path = path
self._count = 1
@property
def path(self):
return self._path
def merge(self, other: 'Saver'):
assert isinstance(other, SaverTest)
assert self.path == other.path
self._count += other._count
# test add
savers = SaverCollection(SaverTest('123'))
savers.add(SaverTest('123'))
savers.add(SaverTest('456'))
def check_collection(mul):
paths = ['123', '456']
for c in savers:
paths.remove(c.path)
if c.path == '123':
assert c._count == 2 * mul
elif c.path == '456':
assert c._count == 1 * mul
else:
assert False, "invalid path"
check_collection(1)
# test update
savers.update(savers)
check_collection(2)

View File

@@ -0,0 +1,21 @@
import pytest
from rl_coach import utils
@pytest.mark.unit_test
def test_get_checkpoint_state_default():
files = ['4.test.ckpt.ext', '2.test.ckpt.ext', '3.test.ckpt.ext', '1.test.ckpt.ext']
checkpoint_state = utils.get_checkpoint_state(files)
assert checkpoint_state.model_checkpoint_path == '4.test.ckpt'
assert checkpoint_state.all_model_checkpoint_paths == [f[:-4] for f in sorted(files)]
@pytest.mark.unit_test
def test_get_checkpoint_state_custom():
files = ['prefix.4.test.ckpt.ext', 'prefix.2.test.ckpt.ext', 'prefix.3.test.ckpt.ext', 'prefix.1.test.ckpt.ext']
assert len(utils.get_checkpoint_state(files).all_model_checkpoint_paths) == 0 # doesn't match the default pattern
checkpoint_state = utils.get_checkpoint_state(files, filename_pattern=r'([0-9]+)[^0-9].*?\.ckpt')
assert checkpoint_state.model_checkpoint_path == '4.test.ckpt'
assert checkpoint_state.all_model_checkpoint_paths == [f[7:-4] for f in sorted(files)]