1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Added data store backend. (#17)

* Added data store backend.
* Add NFS implementation for Kubernetes.
* Added S3 data store implementation.
* Addressed review comments.
This commit is contained in:
Balaji Subramaniam
2018-10-04 09:45:59 -07:00
committed by zach dwiel
parent 6b2de6ba6d
commit 1c238b4c60
6 changed files with 327 additions and 1 deletions

4
.gitignore vendored
View File

@@ -26,3 +26,7 @@ datasets
core core
trace_test* trace_test*
.DS_Store .DS_Store
*.swp
*.swo
.cache/
*.pyc

View File

@@ -14,3 +14,4 @@ futures==3.1.1
wxPython==4.0.1 wxPython==4.0.1
kubernetes==7.0.0 kubernetes==7.0.0
redis==2.10.6 redis==2.10.6
minio==4.0.5

View File

@@ -0,0 +1,26 @@
class DataStoreParameters(object):
def __init__(self, store_type, orchestrator_type, orchestrator_params):
self.store_type = store_type
self.orchestrator_type = orchestrator_type
self.orchestrator_params = orchestrator_params
class DataStore(object):
def __init__(self, params: DataStoreParameters):
pass
def deploy(self) -> bool:
pass
def get_info(self):
pass
def undeploy(self) -> bool:
pass
def save_to_store(self):
pass
def load_from_store(self):
pass

View File

@@ -0,0 +1,12 @@
from rl_coach.data_stores.nfs_data_store import NFSDataStore, NFSDataStoreParameters
from rl_coach.data_stores.s3_data_store import S3DataStore, S3DataStoreParameters
def get_data_store(params):
data_store = None
if type(params) == NFSDataStoreParameters:
data_store = NFSDataStore(params)
elif type(params) == S3DataStoreParameters:
data_store = S3DataStore(params)
return data_store

View File

@@ -0,0 +1,219 @@
from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
from kubernetes import client as k8sclient
class NFSDataStoreParameters(DataStoreParameters):
def __init__(self, ds_params, deployed=False, server=None, path=None):
super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params)
self.namespace = "default"
if "namespace" in ds_params.orchestrator_params:
self.namespace = ds_params.orchestrator_params["namespace"]
self.name = None
self.pvc_name = None
self.pv_name = None
self.svc_name = None
self.server = None
self.path = "/"
self.deployed = deployed
if deployed:
self.server = server
self.path = path
class NFSDataStore(DataStore):
def __init__(self, params: NFSDataStoreParameters):
self.params = params
def deploy(self) -> bool:
if self.params.orchestrator_type == "kubernetes":
if not self.params.deployed:
if not self.deploy_k8s_nfs():
return False
if not self.create_k8s_nfs_resources():
return False
return True
def get_info(self):
return k8sclient.V1PersistentVolumeClaimVolumeSource(
claim_name=self.params.pvc_name
)
def undeploy(self) -> bool:
if self.params.orchestrator_type == "kubernetes":
if not self.params.deployed:
if not self.undeploy_k8s_nfs():
return False
if not self.delete_k8s_nfs_resources():
return False
return True
def save_to_store(self):
pass
def load_from_store(self):
pass
def deploy_k8s_nfs(self) -> bool:
name = "nfs-server"
container = k8sclient.V1Container(
name=name,
image="k8s.gcr.io/volume-nfs:0.8",
ports=[k8sclient.V1ContainerPort(
name="nfs",
container_port=2049,
protocol="TCP"
)]
)
template = k8sclient.V1PodTemplateSpec(
metadata=k8sclient.V1ObjectMeta(labels={'app': 'nfs-server'}),
spec=k8sclient.V1PodSpec(
containers=[container]
)
)
deployment_spec = k8sclient.V1DeploymentSpec(
replicas=1,
template=template,
selector=k8sclient.V1LabelSelector(
match_labels={'app': 'nfs-server'}
)
)
deployment = k8sclient.V1Deployment(
api_version='apps/v1',
kind='Deployment',
metadata=k8sclient.V1ObjectMeta(name=name, labels={'app': 'nfs-server'}),
spec=deployment_spec
)
k8s_apps_v1_api_client = k8sclient.AppsV1Api()
try:
k8s_apps_v1_api_client.create_namespaced_deployment(self.params.namespace, deployment)
self.params.name = name
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while creating nfs-server", e)
return False
k8s_core_v1_api_client = k8sclient.CoreV1Api()
svc_name = "nfs-service"
service = k8sclient.V1Service(
api_version='v1',
kind='Service',
metadata=k8sclient.V1ObjectMeta(
name=svc_name
),
spec=k8sclient.V1ServiceSpec(
selector={'app': self.params.name},
ports=[k8sclient.V1ServicePort(
protocol='TCP',
port=2049,
target_port=2049
)]
)
)
try:
k8s_core_v1_api_client.create_namespaced_service(self.params.namespace, service)
self.params.svc_name = svc_name
self.params.server = 'nfs-service.{}.svc'.format(self.params.namespace)
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while creating a service for nfs-server", e)
return False
return True
def create_k8s_nfs_resources(self) -> bool:
pv_name = "nfs-ckpt-pv"
persistent_volume = k8sclient.V1PersistentVolume(
api_version="v1",
kind="PersistentVolume",
metadata=k8sclient.V1ObjectMeta(
name=pv_name,
labels={'app': pv_name}
),
spec=k8sclient.V1PersistentVolumeSpec(
access_modes=["ReadWriteMany"],
nfs=k8sclient.V1NFSVolumeSource(
path=self.params.path,
server=self.params.server
),
capacity={'storage': '10Gi'},
storage_class_name=""
)
)
k8s_api_client = k8sclient.CoreV1Api()
try:
k8s_api_client.create_persistent_volume(persistent_volume)
self.params.pv_name = pv_name
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while creating the NFS PV", e)
return False
pvc_name = "nfs-ckpt-pvc"
persistent_volume_claim = k8sclient.V1PersistentVolumeClaim(
api_version="v1",
kind="PersistentVolumeClaim",
metadata=k8sclient.V1ObjectMeta(
name=pvc_name
),
spec=k8sclient.V1PersistentVolumeClaimSpec(
access_modes=["ReadWriteMany"],
resources=k8sclient.V1ResourceRequirements(
requests={'storage': '10Gi'}
),
selector=k8sclient.V1LabelSelector(
match_labels={'app': self.params.pv_name}
),
storage_class_name=""
)
)
try:
k8s_api_client.create_namespaced_persistent_volume_claim(self.params.namespace, persistent_volume_claim)
self.params.pvc_name = pvc_name
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while creating the NFS PVC", e)
return False
return True
def undeploy_k8s_nfs(self) -> bool:
del_options = k8sclient.V1DeleteOptions()
k8s_apps_v1_api_client = k8sclient.AppsV1Api()
try:
k8s_apps_v1_api_client.delete_namespaced_deployment(self.params.name, self.params.namespace, del_options)
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while deleting nfs-server", e)
return False
k8s_core_v1_api_client = k8sclient.CoreV1Api()
try:
k8s_core_v1_api_client.delete_namespaced_service(self.params.svc_name, self.params.namespace, del_options)
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while deleting the service for nfs-server", e)
return False
return True
def delete_k8s_nfs_resources(self) -> bool:
del_options = k8sclient.V1DeleteOptions()
k8s_api_client = k8sclient.CoreV1Api()
try:
k8s_api_client.delete_persistent_volume(self.params.pv_name, del_options)
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while deleting NFS PV", e)
return False
try:
k8s_api_client.delete_namespaced_persistent_volume_claim(self.params.pvc_name, self.params.namespace, del_options)
except k8sclient.rest.ApiException as e:
print("Got exception: %s\n while deleting NFS PVC", e)
return False
return True

View File

@@ -0,0 +1,64 @@
from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
from kubernetes import client as k8sclient
from minio import Minio
from minio.error import ResponseError
from configparser import ConfigParser, Error
import os
class S3DataStoreParameters(DataStoreParameters):
def __init__(self, ds_params, creds_file: str = None, end_point: str = None, bucket_name: str = None,
checkpoint_dir: str = None):
super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params)
self.creds_file = creds_file
self.end_point = end_point
self.bucket_name = bucket_name
self.checkpoint_dir = checkpoint_dir
class S3DataStore(DataStore):
def __init__(self, params: S3DataStoreParameters):
self.params = params
access_key = None
secret_key = None
if params.creds_file:
config = ConfigParser()
config.read(params.creds_file)
try:
access_key = config.get('default', 'aws_access_key_id')
secret_key = config.get('default', 'aws_secret_access_key')
except Error as e:
print("Error when reading S3 credentials file: %s", e)
else:
access_key = os.environ.get('ACCESS_KEY_ID')
secret_key = os.environ.get('SECRET_ACCESS_KEY')
self.mc = Minio(self.params.end_point, access_key=access_key, secret_key=secret_key)
def deploy(self) -> bool:
return True
def get_info(self):
return "s3://{}/{}".format(self.params.bucket_name)
def undeploy(self) -> bool:
return True
def save_to_store(self):
try:
for root, dirs, files in os.walk(self.params.checkpoint_dir):
for filename in files:
abs_name = os.path.abspath(os.path.join(root, filename))
rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir)
self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)
except ResponseError as e:
print("Got exception: %s\n while saving to S3", e)
def load_from_store(self):
try:
objects = self.mc.list_objects_v2(self.params.bucket_name, recursive=True)
for obj in objects:
filename = os.path.abspath(os.path.join(self.params.checkpoint_dir, obj.object_name))
self.mc.fget_object(obj.bucket_name, obj.object_name, filename)
except ResponseError as e:
print("Got exception: %s\n while loading from S3", e)