1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Add RedisDataStore (#295)

* GraphManager.set_session also sets self.sess

* make sure that GraphManager.fetch_from_worker uses training phase

* remove unnecessary phase setting in training worker

* reorganize rollout worker

* provide default name to GlobalVariableSaver.__init__ since it isn't really used anyway

* allow dividing TrainingSteps and EnvironmentSteps

* add timestamps to the log

* added redis data store

* conflict merge fix
This commit is contained in:
Zach Dwiel
2019-08-28 14:15:58 -04:00
committed by shadiendrawis
parent 34e1c04f29
commit 7b0fccb041
18 changed files with 528 additions and 120 deletions

View File

@@ -1,7 +1,11 @@
# REGISTRY=gcr.io
REGISTRY=gcr.io
REGISTRY=docker.io
ORGANIZATION=nervana
IMAGE=coach
# REGISTRY=amr-registry.caas.intel.com
# ORGANIZATION=aipg
# IMAGE=coach
CONTEXT = $(realpath ..)
BUILD_ARGUMENTS=
@@ -111,16 +115,17 @@ bootstrap_kubernetes: build push
kubectl run -i --tty --attach --image=${REGISTRY}/${IMAGE} --restart=Never distributed-coach -- python3 rl_coach/orchestrators/start_training.py --preset CartPole_DQN_distributed --image ${IMAGE} -ns 10.63.249.182 -np /
stop_kubernetes:
kubectl delete service --ignore-not-found redis-service
kubectl delete pv --ignore-not-found nfs-checkpoint-pv
kubectl delete pvc --ignore-not-found nfs-checkpoint-pvc
kubectl delete deployment --ignore-not-found redis-server
kubectl get jobs | grep train | awk "{print $\1}" | xargs kubectl delete jobs
kubectl get jobs | grep worker | awk "{print $\1}" | xargs kubectl delete jobs
kubectl get deployments | grep redis-server | awk "{print $$1}" | xargs kubectl delete deployments --ignore-not-found | true
kubectl get services | grep redis-service | awk "{print $$1}" | xargs kubectl delete services --ignore-not-found | true
kubectl get jobs | grep train | awk "{print $$1}" | xargs kubectl delete jobs --ignore-not-found | true
kubectl get jobs | grep worker | awk "{print $$1}" | xargs kubectl delete jobs --ignore-not-found | true
kubernetes: stop_kubernetes
python3 ${CONTEXT}/rl_coach/orchestrators/start_training.py --preset CartPole_DQN_distributed --image ${IMAGE} -ns 10.63.249.182 -np /
distributed: build push stop_kubernetes
python3 ${CONTEXT}/rl_coach/coach.py -p Mujoco_PPO -lvl humanoid --distributed_coach --distributed_coach_config_path ${CONTEXT}/distributed-coach.config -e stop_asking --num_workers 8
push: build
${DOCKER} tag ${IMAGE} ${REGISTRY}/${ORGANIZATION}/${IMAGE}
${DOCKER} push ${REGISTRY}/${ORGANIZATION}/${IMAGE}