1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00

Adding framework for multinode tests (#149)

* Currently runs CartPole_ClippedPPO and Mujoco_ClippedPPO with inverted_pendulum level.
This commit is contained in:
Ajay Deshpande
2019-02-26 13:53:12 -08:00
committed by Balaji Subramaniam
parent b461a1b8ab
commit 2c1a9dbf20
8 changed files with 210 additions and 24 deletions

View File

@@ -166,7 +166,7 @@ class Kubernetes(Deploy):
name="nfs-pvc",
persistent_volume_claim=self.nfs_pvc
)],
restart_policy='OnFailure'
restart_policy='Never'
),
)
else:
@@ -185,7 +185,7 @@ class Kubernetes(Deploy):
metadata=k8sclient.V1ObjectMeta(labels={'app': name}),
spec=k8sclient.V1PodSpec(
containers=[container],
restart_policy='OnFailure'
restart_policy='Never'
),
)
@@ -247,7 +247,7 @@ class Kubernetes(Deploy):
name="nfs-pvc",
persistent_volume_claim=self.nfs_pvc
)],
restart_policy='OnFailure'
restart_policy='Never'
),
)
else:
@@ -266,7 +266,7 @@ class Kubernetes(Deploy):
metadata=k8sclient.V1ObjectMeta(labels={'app': name}),
spec=k8sclient.V1PodSpec(
containers=[container],
restart_policy='OnFailure'
restart_policy='Never'
)
)
@@ -316,7 +316,7 @@ class Kubernetes(Deploy):
return
for pod in pods.items:
Process(target=self._tail_log_file, args=(pod.metadata.name, api_client, self.params.namespace, path)).start()
Process(target=self._tail_log_file, args=(pod.metadata.name, api_client, self.params.namespace, path), daemon=True).start()
def _tail_log_file(self, pod_name, api_client, namespace, path):
if not os.path.exists(path):
@@ -348,7 +348,7 @@ class Kubernetes(Deploy):
if not pod:
return
self.tail_log(pod.metadata.name, api_client)
return self.tail_log(pod.metadata.name, api_client)
def tail_log(self, pod_name, corev1_api):
while True:
@@ -382,9 +382,9 @@ class Kubernetes(Deploy):
container_status.state.waiting.reason == 'CrashLoopBackOff' or \
container_status.state.waiting.reason == 'ImagePullBackOff' or \
container_status.state.waiting.reason == 'ErrImagePull':
return
return 1
if container_status.state.terminated is not None:
return
return container_status.state.terminated.exit_code
def undeploy(self):
"""