mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Added data store backend. (#17)
* Added data store backend. * Add NFS implementation for Kubernetes. * Added S3 data store implementation. * Addressed review comments.
This commit is contained in:
committed by
zach dwiel
parent
6b2de6ba6d
commit
1c238b4c60
4
.gitignore
vendored
4
.gitignore
vendored
@@ -26,3 +26,7 @@ datasets
|
||||
core
|
||||
trace_test*
|
||||
.DS_Store
|
||||
*.swp
|
||||
*.swo
|
||||
.cache/
|
||||
*.pyc
|
||||
|
||||
@@ -13,4 +13,5 @@ bokeh==0.13.0
|
||||
futures==3.1.1
|
||||
wxPython==4.0.1
|
||||
kubernetes==7.0.0
|
||||
redis==2.10.6
|
||||
redis==2.10.6
|
||||
minio==4.0.5
|
||||
|
||||
26
rl_coach/data_stores/data_store.py
Normal file
26
rl_coach/data_stores/data_store.py
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
|
||||
class DataStoreParameters(object):
|
||||
def __init__(self, store_type, orchestrator_type, orchestrator_params):
|
||||
self.store_type = store_type
|
||||
self.orchestrator_type = orchestrator_type
|
||||
self.orchestrator_params = orchestrator_params
|
||||
|
||||
class DataStore(object):
|
||||
def __init__(self, params: DataStoreParameters):
|
||||
pass
|
||||
|
||||
def deploy(self) -> bool:
|
||||
pass
|
||||
|
||||
def get_info(self):
|
||||
pass
|
||||
|
||||
def undeploy(self) -> bool:
|
||||
pass
|
||||
|
||||
def save_to_store(self):
|
||||
pass
|
||||
|
||||
def load_from_store(self):
|
||||
pass
|
||||
12
rl_coach/data_stores/data_store_impl.py
Normal file
12
rl_coach/data_stores/data_store_impl.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from rl_coach.data_stores.nfs_data_store import NFSDataStore, NFSDataStoreParameters
|
||||
from rl_coach.data_stores.s3_data_store import S3DataStore, S3DataStoreParameters
|
||||
|
||||
|
||||
def get_data_store(params):
|
||||
data_store = None
|
||||
if type(params) == NFSDataStoreParameters:
|
||||
data_store = NFSDataStore(params)
|
||||
elif type(params) == S3DataStoreParameters:
|
||||
data_store = S3DataStore(params)
|
||||
|
||||
return data_store
|
||||
219
rl_coach/data_stores/nfs_data_store.py
Normal file
219
rl_coach/data_stores/nfs_data_store.py
Normal file
@@ -0,0 +1,219 @@
|
||||
from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
|
||||
from kubernetes import client as k8sclient
|
||||
|
||||
|
||||
class NFSDataStoreParameters(DataStoreParameters):
|
||||
def __init__(self, ds_params, deployed=False, server=None, path=None):
|
||||
super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params)
|
||||
self.namespace = "default"
|
||||
if "namespace" in ds_params.orchestrator_params:
|
||||
self.namespace = ds_params.orchestrator_params["namespace"]
|
||||
self.name = None
|
||||
self.pvc_name = None
|
||||
self.pv_name = None
|
||||
self.svc_name = None
|
||||
self.server = None
|
||||
self.path = "/"
|
||||
self.deployed = deployed
|
||||
if deployed:
|
||||
self.server = server
|
||||
self.path = path
|
||||
|
||||
|
||||
class NFSDataStore(DataStore):
|
||||
def __init__(self, params: NFSDataStoreParameters):
|
||||
self.params = params
|
||||
|
||||
def deploy(self) -> bool:
|
||||
if self.params.orchestrator_type == "kubernetes":
|
||||
if not self.params.deployed:
|
||||
if not self.deploy_k8s_nfs():
|
||||
return False
|
||||
if not self.create_k8s_nfs_resources():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_info(self):
|
||||
return k8sclient.V1PersistentVolumeClaimVolumeSource(
|
||||
claim_name=self.params.pvc_name
|
||||
)
|
||||
|
||||
def undeploy(self) -> bool:
|
||||
if self.params.orchestrator_type == "kubernetes":
|
||||
if not self.params.deployed:
|
||||
if not self.undeploy_k8s_nfs():
|
||||
return False
|
||||
if not self.delete_k8s_nfs_resources():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def save_to_store(self):
|
||||
pass
|
||||
|
||||
def load_from_store(self):
|
||||
pass
|
||||
|
||||
def deploy_k8s_nfs(self) -> bool:
|
||||
name = "nfs-server"
|
||||
container = k8sclient.V1Container(
|
||||
name=name,
|
||||
image="k8s.gcr.io/volume-nfs:0.8",
|
||||
ports=[k8sclient.V1ContainerPort(
|
||||
name="nfs",
|
||||
container_port=2049,
|
||||
protocol="TCP"
|
||||
)]
|
||||
)
|
||||
template = k8sclient.V1PodTemplateSpec(
|
||||
metadata=k8sclient.V1ObjectMeta(labels={'app': 'nfs-server'}),
|
||||
spec=k8sclient.V1PodSpec(
|
||||
containers=[container]
|
||||
)
|
||||
)
|
||||
deployment_spec = k8sclient.V1DeploymentSpec(
|
||||
replicas=1,
|
||||
template=template,
|
||||
selector=k8sclient.V1LabelSelector(
|
||||
match_labels={'app': 'nfs-server'}
|
||||
)
|
||||
)
|
||||
|
||||
deployment = k8sclient.V1Deployment(
|
||||
api_version='apps/v1',
|
||||
kind='Deployment',
|
||||
metadata=k8sclient.V1ObjectMeta(name=name, labels={'app': 'nfs-server'}),
|
||||
spec=deployment_spec
|
||||
)
|
||||
|
||||
k8s_apps_v1_api_client = k8sclient.AppsV1Api()
|
||||
try:
|
||||
k8s_apps_v1_api_client.create_namespaced_deployment(self.params.namespace, deployment)
|
||||
self.params.name = name
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while creating nfs-server", e)
|
||||
return False
|
||||
|
||||
k8s_core_v1_api_client = k8sclient.CoreV1Api()
|
||||
|
||||
svc_name = "nfs-service"
|
||||
service = k8sclient.V1Service(
|
||||
api_version='v1',
|
||||
kind='Service',
|
||||
metadata=k8sclient.V1ObjectMeta(
|
||||
name=svc_name
|
||||
),
|
||||
spec=k8sclient.V1ServiceSpec(
|
||||
selector={'app': self.params.name},
|
||||
ports=[k8sclient.V1ServicePort(
|
||||
protocol='TCP',
|
||||
port=2049,
|
||||
target_port=2049
|
||||
)]
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
k8s_core_v1_api_client.create_namespaced_service(self.params.namespace, service)
|
||||
self.params.svc_name = svc_name
|
||||
self.params.server = 'nfs-service.{}.svc'.format(self.params.namespace)
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while creating a service for nfs-server", e)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def create_k8s_nfs_resources(self) -> bool:
|
||||
pv_name = "nfs-ckpt-pv"
|
||||
persistent_volume = k8sclient.V1PersistentVolume(
|
||||
api_version="v1",
|
||||
kind="PersistentVolume",
|
||||
metadata=k8sclient.V1ObjectMeta(
|
||||
name=pv_name,
|
||||
labels={'app': pv_name}
|
||||
),
|
||||
spec=k8sclient.V1PersistentVolumeSpec(
|
||||
access_modes=["ReadWriteMany"],
|
||||
nfs=k8sclient.V1NFSVolumeSource(
|
||||
path=self.params.path,
|
||||
server=self.params.server
|
||||
),
|
||||
capacity={'storage': '10Gi'},
|
||||
storage_class_name=""
|
||||
)
|
||||
)
|
||||
k8s_api_client = k8sclient.CoreV1Api()
|
||||
try:
|
||||
k8s_api_client.create_persistent_volume(persistent_volume)
|
||||
self.params.pv_name = pv_name
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while creating the NFS PV", e)
|
||||
return False
|
||||
|
||||
pvc_name = "nfs-ckpt-pvc"
|
||||
persistent_volume_claim = k8sclient.V1PersistentVolumeClaim(
|
||||
api_version="v1",
|
||||
kind="PersistentVolumeClaim",
|
||||
metadata=k8sclient.V1ObjectMeta(
|
||||
name=pvc_name
|
||||
),
|
||||
spec=k8sclient.V1PersistentVolumeClaimSpec(
|
||||
access_modes=["ReadWriteMany"],
|
||||
resources=k8sclient.V1ResourceRequirements(
|
||||
requests={'storage': '10Gi'}
|
||||
),
|
||||
selector=k8sclient.V1LabelSelector(
|
||||
match_labels={'app': self.params.pv_name}
|
||||
),
|
||||
storage_class_name=""
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
k8s_api_client.create_namespaced_persistent_volume_claim(self.params.namespace, persistent_volume_claim)
|
||||
self.params.pvc_name = pvc_name
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while creating the NFS PVC", e)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def undeploy_k8s_nfs(self) -> bool:
|
||||
del_options = k8sclient.V1DeleteOptions()
|
||||
|
||||
k8s_apps_v1_api_client = k8sclient.AppsV1Api()
|
||||
try:
|
||||
k8s_apps_v1_api_client.delete_namespaced_deployment(self.params.name, self.params.namespace, del_options)
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while deleting nfs-server", e)
|
||||
return False
|
||||
|
||||
k8s_core_v1_api_client = k8sclient.CoreV1Api()
|
||||
try:
|
||||
k8s_core_v1_api_client.delete_namespaced_service(self.params.svc_name, self.params.namespace, del_options)
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while deleting the service for nfs-server", e)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def delete_k8s_nfs_resources(self) -> bool:
|
||||
del_options = k8sclient.V1DeleteOptions()
|
||||
k8s_api_client = k8sclient.CoreV1Api()
|
||||
|
||||
try:
|
||||
k8s_api_client.delete_persistent_volume(self.params.pv_name, del_options)
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while deleting NFS PV", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
k8s_api_client.delete_namespaced_persistent_volume_claim(self.params.pvc_name, self.params.namespace, del_options)
|
||||
except k8sclient.rest.ApiException as e:
|
||||
print("Got exception: %s\n while deleting NFS PVC", e)
|
||||
return False
|
||||
|
||||
return True
|
||||
64
rl_coach/data_stores/s3_data_store.py
Normal file
64
rl_coach/data_stores/s3_data_store.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
|
||||
from kubernetes import client as k8sclient
|
||||
from minio import Minio
|
||||
from minio.error import ResponseError
|
||||
from configparser import ConfigParser, Error
|
||||
import os
|
||||
|
||||
|
||||
class S3DataStoreParameters(DataStoreParameters):
|
||||
def __init__(self, ds_params, creds_file: str = None, end_point: str = None, bucket_name: str = None,
|
||||
checkpoint_dir: str = None):
|
||||
|
||||
super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params)
|
||||
self.creds_file = creds_file
|
||||
self.end_point = end_point
|
||||
self.bucket_name = bucket_name
|
||||
self.checkpoint_dir = checkpoint_dir
|
||||
|
||||
|
||||
class S3DataStore(DataStore):
|
||||
def __init__(self, params: S3DataStoreParameters):
|
||||
self.params = params
|
||||
access_key = None
|
||||
secret_key = None
|
||||
if params.creds_file:
|
||||
config = ConfigParser()
|
||||
config.read(params.creds_file)
|
||||
try:
|
||||
access_key = config.get('default', 'aws_access_key_id')
|
||||
secret_key = config.get('default', 'aws_secret_access_key')
|
||||
except Error as e:
|
||||
print("Error when reading S3 credentials file: %s", e)
|
||||
else:
|
||||
access_key = os.environ.get('ACCESS_KEY_ID')
|
||||
secret_key = os.environ.get('SECRET_ACCESS_KEY')
|
||||
self.mc = Minio(self.params.end_point, access_key=access_key, secret_key=secret_key)
|
||||
|
||||
def deploy(self) -> bool:
|
||||
return True
|
||||
|
||||
def get_info(self):
|
||||
return "s3://{}/{}".format(self.params.bucket_name)
|
||||
|
||||
def undeploy(self) -> bool:
|
||||
return True
|
||||
|
||||
def save_to_store(self):
|
||||
try:
|
||||
for root, dirs, files in os.walk(self.params.checkpoint_dir):
|
||||
for filename in files:
|
||||
abs_name = os.path.abspath(os.path.join(root, filename))
|
||||
rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir)
|
||||
self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)
|
||||
except ResponseError as e:
|
||||
print("Got exception: %s\n while saving to S3", e)
|
||||
|
||||
def load_from_store(self):
|
||||
try:
|
||||
objects = self.mc.list_objects_v2(self.params.bucket_name, recursive=True)
|
||||
for obj in objects:
|
||||
filename = os.path.abspath(os.path.join(self.params.checkpoint_dir, obj.object_name))
|
||||
self.mc.fget_object(obj.bucket_name, obj.object_name, filename)
|
||||
except ResponseError as e:
|
||||
print("Got exception: %s\n while loading from S3", e)
|
||||
Reference in New Issue
Block a user