diff --git a/.gitignore b/.gitignore index 12c6990..3d028c9 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ datasets core trace_test* .DS_Store +*.swp +*.swo +.cache/ +*.pyc diff --git a/requirements.txt b/requirements.txt index 463dd95..acd5ff7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,5 @@ bokeh==0.13.0 futures==3.1.1 wxPython==4.0.1 kubernetes==7.0.0 -redis==2.10.6 \ No newline at end of file +redis==2.10.6 +minio==4.0.5 diff --git a/rl_coach/data_stores/data_store.py b/rl_coach/data_stores/data_store.py new file mode 100644 index 0000000..03718e1 --- /dev/null +++ b/rl_coach/data_stores/data_store.py @@ -0,0 +1,26 @@ + + +class DataStoreParameters(object): + def __init__(self, store_type, orchestrator_type, orchestrator_params): + self.store_type = store_type + self.orchestrator_type = orchestrator_type + self.orchestrator_params = orchestrator_params + +class DataStore(object): + def __init__(self, params: DataStoreParameters): + pass + + def deploy(self) -> bool: + pass + + def get_info(self): + pass + + def undeploy(self) -> bool: + pass + + def save_to_store(self): + pass + + def load_from_store(self): + pass diff --git a/rl_coach/data_stores/data_store_impl.py b/rl_coach/data_stores/data_store_impl.py new file mode 100644 index 0000000..9727fc1 --- /dev/null +++ b/rl_coach/data_stores/data_store_impl.py @@ -0,0 +1,12 @@ +from rl_coach.data_stores.nfs_data_store import NFSDataStore, NFSDataStoreParameters +from rl_coach.data_stores.s3_data_store import S3DataStore, S3DataStoreParameters + + +def get_data_store(params): + data_store = None + if type(params) == NFSDataStoreParameters: + data_store = NFSDataStore(params) + elif type(params) == S3DataStoreParameters: + data_store = S3DataStore(params) + + return data_store diff --git a/rl_coach/data_stores/nfs_data_store.py b/rl_coach/data_stores/nfs_data_store.py new file mode 100644 index 0000000..6946139 --- /dev/null +++ b/rl_coach/data_stores/nfs_data_store.py @@ -0,0 +1,219 @@ +from rl_coach.data_stores.data_store import DataStore, DataStoreParameters +from kubernetes import client as k8sclient + + +class NFSDataStoreParameters(DataStoreParameters): + def __init__(self, ds_params, deployed=False, server=None, path=None): + super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params) + self.namespace = "default" + if "namespace" in ds_params.orchestrator_params: + self.namespace = ds_params.orchestrator_params["namespace"] + self.name = None + self.pvc_name = None + self.pv_name = None + self.svc_name = None + self.server = None + self.path = "/" + self.deployed = deployed + if deployed: + self.server = server + self.path = path + + +class NFSDataStore(DataStore): + def __init__(self, params: NFSDataStoreParameters): + self.params = params + + def deploy(self) -> bool: + if self.params.orchestrator_type == "kubernetes": + if not self.params.deployed: + if not self.deploy_k8s_nfs(): + return False + if not self.create_k8s_nfs_resources(): + return False + + return True + + def get_info(self): + return k8sclient.V1PersistentVolumeClaimVolumeSource( + claim_name=self.params.pvc_name + ) + + def undeploy(self) -> bool: + if self.params.orchestrator_type == "kubernetes": + if not self.params.deployed: + if not self.undeploy_k8s_nfs(): + return False + if not self.delete_k8s_nfs_resources(): + return False + + return True + + def save_to_store(self): + pass + + def load_from_store(self): + pass + + def deploy_k8s_nfs(self) -> bool: + name = "nfs-server" + container = k8sclient.V1Container( + name=name, + image="k8s.gcr.io/volume-nfs:0.8", + ports=[k8sclient.V1ContainerPort( + name="nfs", + container_port=2049, + protocol="TCP" + )] + ) + template = k8sclient.V1PodTemplateSpec( + metadata=k8sclient.V1ObjectMeta(labels={'app': 'nfs-server'}), + spec=k8sclient.V1PodSpec( + containers=[container] + ) + ) + deployment_spec = k8sclient.V1DeploymentSpec( + replicas=1, + template=template, + selector=k8sclient.V1LabelSelector( + match_labels={'app': 'nfs-server'} + ) + ) + + deployment = k8sclient.V1Deployment( + api_version='apps/v1', + kind='Deployment', + metadata=k8sclient.V1ObjectMeta(name=name, labels={'app': 'nfs-server'}), + spec=deployment_spec + ) + + k8s_apps_v1_api_client = k8sclient.AppsV1Api() + try: + k8s_apps_v1_api_client.create_namespaced_deployment(self.params.namespace, deployment) + self.params.name = name + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while creating nfs-server", e) + return False + + k8s_core_v1_api_client = k8sclient.CoreV1Api() + + svc_name = "nfs-service" + service = k8sclient.V1Service( + api_version='v1', + kind='Service', + metadata=k8sclient.V1ObjectMeta( + name=svc_name + ), + spec=k8sclient.V1ServiceSpec( + selector={'app': self.params.name}, + ports=[k8sclient.V1ServicePort( + protocol='TCP', + port=2049, + target_port=2049 + )] + ) + ) + + try: + k8s_core_v1_api_client.create_namespaced_service(self.params.namespace, service) + self.params.svc_name = svc_name + self.params.server = 'nfs-service.{}.svc'.format(self.params.namespace) + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while creating a service for nfs-server", e) + return False + + return True + + + def create_k8s_nfs_resources(self) -> bool: + pv_name = "nfs-ckpt-pv" + persistent_volume = k8sclient.V1PersistentVolume( + api_version="v1", + kind="PersistentVolume", + metadata=k8sclient.V1ObjectMeta( + name=pv_name, + labels={'app': pv_name} + ), + spec=k8sclient.V1PersistentVolumeSpec( + access_modes=["ReadWriteMany"], + nfs=k8sclient.V1NFSVolumeSource( + path=self.params.path, + server=self.params.server + ), + capacity={'storage': '10Gi'}, + storage_class_name="" + ) + ) + k8s_api_client = k8sclient.CoreV1Api() + try: + k8s_api_client.create_persistent_volume(persistent_volume) + self.params.pv_name = pv_name + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while creating the NFS PV", e) + return False + + pvc_name = "nfs-ckpt-pvc" + persistent_volume_claim = k8sclient.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=k8sclient.V1ObjectMeta( + name=pvc_name + ), + spec=k8sclient.V1PersistentVolumeClaimSpec( + access_modes=["ReadWriteMany"], + resources=k8sclient.V1ResourceRequirements( + requests={'storage': '10Gi'} + ), + selector=k8sclient.V1LabelSelector( + match_labels={'app': self.params.pv_name} + ), + storage_class_name="" + ) + ) + + try: + k8s_api_client.create_namespaced_persistent_volume_claim(self.params.namespace, persistent_volume_claim) + self.params.pvc_name = pvc_name + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while creating the NFS PVC", e) + return False + + return True + + def undeploy_k8s_nfs(self) -> bool: + del_options = k8sclient.V1DeleteOptions() + + k8s_apps_v1_api_client = k8sclient.AppsV1Api() + try: + k8s_apps_v1_api_client.delete_namespaced_deployment(self.params.name, self.params.namespace, del_options) + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while deleting nfs-server", e) + return False + + k8s_core_v1_api_client = k8sclient.CoreV1Api() + try: + k8s_core_v1_api_client.delete_namespaced_service(self.params.svc_name, self.params.namespace, del_options) + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while deleting the service for nfs-server", e) + return False + + return True + + + def delete_k8s_nfs_resources(self) -> bool: + del_options = k8sclient.V1DeleteOptions() + k8s_api_client = k8sclient.CoreV1Api() + + try: + k8s_api_client.delete_persistent_volume(self.params.pv_name, del_options) + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while deleting NFS PV", e) + return False + + try: + k8s_api_client.delete_namespaced_persistent_volume_claim(self.params.pvc_name, self.params.namespace, del_options) + except k8sclient.rest.ApiException as e: + print("Got exception: %s\n while deleting NFS PVC", e) + return False + + return True diff --git a/rl_coach/data_stores/s3_data_store.py b/rl_coach/data_stores/s3_data_store.py new file mode 100644 index 0000000..1b623e2 --- /dev/null +++ b/rl_coach/data_stores/s3_data_store.py @@ -0,0 +1,64 @@ +from rl_coach.data_stores.data_store import DataStore, DataStoreParameters +from kubernetes import client as k8sclient +from minio import Minio +from minio.error import ResponseError +from configparser import ConfigParser, Error +import os + + +class S3DataStoreParameters(DataStoreParameters): + def __init__(self, ds_params, creds_file: str = None, end_point: str = None, bucket_name: str = None, + checkpoint_dir: str = None): + + super().__init__(ds_params.store_type, ds_params.orchestrator_type, ds_params.orchestrator_params) + self.creds_file = creds_file + self.end_point = end_point + self.bucket_name = bucket_name + self.checkpoint_dir = checkpoint_dir + + +class S3DataStore(DataStore): + def __init__(self, params: S3DataStoreParameters): + self.params = params + access_key = None + secret_key = None + if params.creds_file: + config = ConfigParser() + config.read(params.creds_file) + try: + access_key = config.get('default', 'aws_access_key_id') + secret_key = config.get('default', 'aws_secret_access_key') + except Error as e: + print("Error when reading S3 credentials file: %s", e) + else: + access_key = os.environ.get('ACCESS_KEY_ID') + secret_key = os.environ.get('SECRET_ACCESS_KEY') + self.mc = Minio(self.params.end_point, access_key=access_key, secret_key=secret_key) + + def deploy(self) -> bool: + return True + + def get_info(self): + return "s3://{}/{}".format(self.params.bucket_name) + + def undeploy(self) -> bool: + return True + + def save_to_store(self): + try: + for root, dirs, files in os.walk(self.params.checkpoint_dir): + for filename in files: + abs_name = os.path.abspath(os.path.join(root, filename)) + rel_name = os.path.relpath(abs_name, self.params.checkpoint_dir) + self.mc.fput_object(self.params.bucket_name, rel_name, abs_name) + except ResponseError as e: + print("Got exception: %s\n while saving to S3", e) + + def load_from_store(self): + try: + objects = self.mc.list_objects_v2(self.params.bucket_name, recursive=True) + for obj in objects: + filename = os.path.abspath(os.path.join(self.params.checkpoint_dir, obj.object_name)) + self.mc.fget_object(obj.bucket_name, obj.object_name, filename) + except ResponseError as e: + print("Got exception: %s\n while loading from S3", e)