diff --git a/.circleci/config.yml b/.circleci/config.yml index fdac537..d966349 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,10 +30,12 @@ aliases: sudo curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.10.3/2018-07-26/bin/linux/amd64/aws-iam-authenticator sudo chmod a+x /usr/local/bin/aws-iam-authenticator aws eks update-kubeconfig --name coach-aws-cicd + sudo curl -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl + sudo chmod a+x /usr/local/bin/kubectl version: 2 jobs: - build: + build_base: <<: *executor_prep steps: - checkout @@ -41,7 +43,7 @@ jobs: - *restore_cache - *aws_prep - run: - name: Build and push container + name: Build and push base and main container command: | REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com TAG=$(git describe --tags --always --dirty) @@ -53,10 +55,64 @@ jobs: docker tag ${REGISTRY}/coach-base:${TAG} coach-base:master - docker build -t ${REGISTRY}/coach:${TAG} -f docker/Dockerfile . + docker build --build-arg MUJOCO_KEY=${MUJOCO_KEY} -t ${REGISTRY}/coach:${TAG} -f docker/Dockerfile . docker push ${REGISTRY}/coach:${TAG} no_output_timeout: 30m + build_gym_env: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: Build and push gym environment container + command: | + REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com + TAG=$(git describe --tags --always --dirty) + docker pull ${REGISTRY}/coach-base:${MASTER_BRANCH} + docker tag ${REGISTRY}/coach-base:${MASTER_BRANCH} coach-base:master + docker build --cache-from ${REGISTRY}/coach-base:${MASTER_BRANCH} -t ${REGISTRY}/coach-gym_environment:${TAG} -f docker/Dockerfile.gym_environment . + docker push ${REGISTRY}/coach-gym_environment:${TAG} + no_output_timeout: 10m + + build_doom_env: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: Build and push vizdoom environment container + command: | + REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com + TAG=$(git describe --tags --always --dirty) + docker pull ${REGISTRY}/coach-base:${MASTER_BRANCH} + docker tag ${REGISTRY}/coach-base:${MASTER_BRANCH} coach-base:master + docker build --cache-from ${REGISTRY}/coach-base:${MASTER_BRANCH} -t ${REGISTRY}/coach-doom_environment:${TAG} -f docker/Dockerfile.doom_environment . + docker push ${REGISTRY}/coach-doom_environment:${TAG} + no_output_timeout: 10m + + build_mujoco_env: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: Build and push mujoco environment container + command: | + REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com + TAG=$(git describe --tags --always --dirty) + docker pull ${REGISTRY}/coach-base:${MASTER_BRANCH} + docker tag ${REGISTRY}/coach-base:${MASTER_BRANCH} coach-base:master + docker build --cache-from ${REGISTRY}/coach-base:${MASTER_BRANCH} --build-arg MUJOCO_KEY=${MUJOCO_KEY} -t ${REGISTRY}/coach-mujoco_environment:${TAG} -f docker/Dockerfile.mujoco_environment . + docker push ${REGISTRY}/coach-mujoco_environment:${TAG} + no_output_timeout: 10m + unit_tests: <<: *executor_prep steps: @@ -70,6 +126,14 @@ jobs: export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn unit-test -tc 'make unit_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=unit-test-${CIRCLE_BUILD_NUM} || true + kubectl delete ns unit-test-${CIRCLE_BUILD_NUM} || true + when: always integration_tests: <<: *executor_prep @@ -84,8 +148,16 @@ jobs: export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn integration-test -tc 'make integration_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=integration-test-${CIRCLE_BUILD_NUM} || true + kubectl delete ns integration-test-${CIRCLE_BUILD_NUM} || true + when: always - golden_tests: + golden_test_gym: <<: *executor_prep steps: - checkout @@ -93,13 +165,23 @@ jobs: - *restore_cache - *aws_prep - run: - name: run golden tests + name: run gym related golden tests command: | export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` - python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn golden-test -tc 'make golden_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + export PRESETS='CartPole_A3C,CartPole_Dueling_DDQN,CartPole_NStepQ,CartPole_DQN,CartPole_DFP,CartPole_PG,CartPole_NEC,CartPole_ClippedPPO,CartPole_PAL' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn golden-test-gym -tc "export PRESETS=${PRESETS} && make golden_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-gym_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=golden-test-gym-${CIRCLE_BUILD_NUM} || true + kubectl delete ns golden-test-gym-${CIRCLE_BUILD_NUM} || true + when: always - trace_tests: + golden_test_doom: <<: *executor_prep steps: - checkout @@ -107,30 +189,197 @@ jobs: - *restore_cache - *aws_prep - run: - name: run trace tests + name: run doom related golden tests command: | export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` - python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn trace-test -tc 'make trace_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + export PRESETS='Doom_Basic_DQN,Doom_Basic_A3C,Doom_Health_DFP' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn golden-test-doom -tc "export PRESETS=${PRESETS} && make golden_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-doom_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=golden-test-doom-${CIRCLE_BUILD_NUM} || true + kubectl delete ns golden-test-doom-${CIRCLE_BUILD_NUM} || true + when: always + + golden_test_mujoco: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: run mujoco related golden tests + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + export PRESETS='BitFlip_DQN_HER,BitFlip_DQN,Mujoco_A3C,Mujoco_A3C_LSTM,Mujoco_PPO,Mujoco_ClippedPPO,Mujoco_DDPG' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn golden-test-mujoco -tc "export PRESETS=${PRESETS} && make golden_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-mujoco_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=golden-test-mujoco-${CIRCLE_BUILD_NUM} || true + kubectl delete ns golden-test-mujoco-${CIRCLE_BUILD_NUM} || true + when: always + + trace_test_gym: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: run gym related trace tests + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + export PRESETS='CartPole_A3C,CartPole_Dueling_DDQN,CartPole_NStepQ,CartPole_DQN,CartPole_DFP,CartPole_PG,CartPole_NEC,CartPole_ClippedPPO,CartPole_PAL' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn trace-test-gym -tc "export PRESETS=${PRESETS} && make trace_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-gym_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=trace-test-gym-${CIRCLE_BUILD_NUM} || true + kubectl delete ns trace-test-gym-${CIRCLE_BUILD_NUM} || true + when: always + + trace_test_doom: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: run doom related trace tests + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + export PRESETS='Doom_Basic_DQN,Doom_Basic_A3C,Doom_Health_DFP' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn trace-test-doom -tc "export PRESETS=${PRESETS} && make trace_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-doom_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=trace-test-doom-${CIRCLE_BUILD_NUM} || true + kubectl delete ns trace-test-doom-${CIRCLE_BUILD_NUM} || true + when: always + + trace_test_mujoco: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: run mujoco related trace tests + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + export PRESETS='BitFlip_DQN_HER,BitFlip_DQN,Mujoco_A3C,Mujoco_A3C_LSTM,Mujoco_PPO,Mujoco_ClippedPPO,Mujoco_DDPG' + python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn trace-test-mujoco -tc "export PRESETS=${PRESETS} && make trace_tests_without_docker" -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach-mujoco_environment:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096 + no_output_timeout: 30m + - run: + name: cleanup + command: | + export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode` + export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode` + kubectl delete --all pods --namespace=trace-test-mujoco-${CIRCLE_BUILD_NUM} || true + kubectl delete ns trace-test-mujoco-${CIRCLE_BUILD_NUM} || true + when: always + + container_deploy: + <<: *executor_prep + steps: + - checkout + - *remote_docker + - *restore_cache + - *aws_prep + - run: + name: Tag and push updated base and main container + command: | + REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com + TAG=$(git describe --tags --always --dirty) + + docker pull ${REGISTRY}/coach-base:${TAG} + docker tag ${REGISTRY}/coach-base:${TAG} coach-base:${MASTER_BRANCH} + docker push ${REGISTRY}/coach-base:${MASTER_BRANCH} + + docker pull ${REGISTRY}/coach:${TAG} + docker tag ${REGISTRY}/coach:${TAG} coach:${MASTER_BRANCH} + docker push ${REGISTRY}/coach:${MASTER_BRANCH} workflows: version: 2 build_and_test: jobs: - - build + - build_base - unit_tests: requires: - - build + - build_base - integration_tests: requires: - - build + - build_base - e2e_approval: type: approval requires: - - build - - golden_tests: + - build_base + - build_gym_env: requires: - e2e_approval - - trace_tests: + - build_doom_env: requires: - e2e_approval + - build_mujoco_env: + requires: + - e2e_approval + - gym_approval: + type: approval + requires: + - golden_test_gym + - doom_approval: + type: approval + requires: + - golden_test_doom + - mujoco_approval: + type: approval + requires: + - golden_test_mujoco + - golden_test_gym: + requires: + - build_gym_env + - golden_test_doom: + requires: + - build_doom_env + - golden_test_mujoco: + requires: + - build_mujoco_env + - trace_test_gym: + requires: + - gym_approval + - trace_test_doom: + requires: + - doom_approval + - trace_test_mujoco: + requires: + - mujoco_approval + - container_deploy: + requires: + - unit_tests + - integration_tests + filters: + branches: + only: 0.11.0-release diff --git a/README.md b/README.md index d03fc8c..7100caa 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Coach +[![CircleCI](https://circleci.com/gh/IntelAI/coach-aws.svg?style=svg&circle-token=e2b3ca534b4956baff8b66459faf0f796117e803)](https://circleci.com/gh/IntelAI/coach-aws) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/NervanaSystems/coach/blob/master/LICENSE) [![Docs](https://readthedocs.org/projects/carla/badge/?version=latest)](https://nervanasystems.github.io/coach/) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1134898.svg)](https://doi.org/10.5281/zenodo.1134898) diff --git a/docker/Dockerfile b/docker/Dockerfile index 1237e64..420ac8f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,16 +1,32 @@ -FROM coach-base:master +FROM coach-base:master as builder + +# prep some of the more common environments +# Gym (installed with coach) +# Mujoco +RUN mkdir -p ~/.mujoco \ + && wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \ + && unzip mujoco.zip -d ~/.mujoco \ + && rm mujoco.zip +ARG MUJOCO_KEY +ENV MUJOCO_KEY=$MUJOCO_KEY +ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH +RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt +RUN pip3 install mujoco_py +# Vizdoom +RUN pip3 install vizdoom RUN mkdir /root/src COPY setup.py /root/src/. COPY requirements.txt /root/src/. +RUN pip3 install -r /root/src/requirements.txt + +FROM coach-base:master +WORKDIR /root/src +COPY --from=builder /root/.mujoco /root/.mujoco +ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH +COPY --from=builder /root/.cache /root/.cache +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. COPY README.md /root/src/. -WORKDIR /root/src -RUN pip3 install -e .[all] - -# everything above here should be cached most of the time +RUN pip3 install mujoco_py vizdoom && pip3 install -e .[all] && rm -rf /root/.cache COPY . /root/src -WORKDIR /root/src -RUN pip3 install -e .[all] - -RUN chmod 777 /root/src/docker/docker_entrypoint.sh -ENTRYPOINT ["/root/src/docker/docker_entrypoint.sh"] diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index 8659096..75cc615 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -1,16 +1,4 @@ -FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 - -# https://github.com/NVIDIA/nvidia-docker/issues/619 -RUN rm /etc/apt/sources.list.d/cuda.list -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get clean autoclean && \ - apt-get autoremove -y && apt-get update && \ - apt-get install -y python-pip && \ - apt-get clean autoclean && \ - apt-get autoremove -y -RUN pip install pip --upgrade -WORKDIR /root +FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04 ################################ # Install apt-get Requirements # @@ -45,19 +33,7 @@ RUN apt-get update && \ # Install Pip Requirements # ############################ RUN pip3 install --upgrade pip -RUN pip3 install pytest -RUN pip3 install pytest-xdist - -# initial installation of coach, so that the docker build won't install everything from scratch -RUN pip3 install rl_coach>=0.10.0 && pip3 install gym[atari]==0.10.5 && \ - pip3 install mujoco_py==1.50.1.56 && pip3 install vizdoom==1.1.6 - -RUN mkdir -p ~/.mujoco \ - && wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \ - && unzip mujoco.zip -d ~/.mujoco \ - && rm mujoco.zip -# COPY ./mjkey.txt /root/.mujoco/ -ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH +RUN pip3 install setuptools==39.1.0 && pip3 install pytest && pip3 install pytest-xdist RUN curl -o /usr/local/bin/patchelf https://s3-us-west-2.amazonaws.com/openai-sci-artifacts/manual-builds/patchelf_0.9_amd64.elf \ && chmod +x /usr/local/bin/patchelf diff --git a/docker/Dockerfile.build b/docker/Dockerfile.build deleted file mode 100644 index 176ea8f..0000000 --- a/docker/Dockerfile.build +++ /dev/null @@ -1,26 +0,0 @@ -FROM ubuntu:16.04 - -RUN apt-get update \ - && apt-get install -y \ - python3-pip cmake zlib1g-dev python3-tk python-opencv \ - libboost-all-dev \ - libblas-dev liblapack-dev libatlas-base-dev gfortran \ - libsdl-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev \ - libsmpeg-dev libportmidi-dev libavformat-dev libswscale-dev \ - dpkg-dev build-essential python3.5-dev libjpeg-dev libtiff-dev \ - libsdl1.2-dev libnotify-dev freeglut3 freeglut3-dev libsm-dev \ - libgtk2.0-dev libgtk-3-dev libwebkitgtk-dev libgtk-3-dev \ - libwebkitgtk-3.0-dev libgstreamer-plugins-base1.0-dev \ - libav-tools libsdl2-dev swig - -RUN pip3 install --upgrade pip - -COPY requirements.txt /coach/requirements.txt - -WORKDIR /coach - -RUN pip3 install -r requirements.txt - -COPY . /coach - -RUN pip3 install . diff --git a/docker/Dockerfile.doom_environment b/docker/Dockerfile.doom_environment new file mode 100644 index 0000000..4498e80 --- /dev/null +++ b/docker/Dockerfile.doom_environment @@ -0,0 +1,20 @@ +FROM coach-base:master as builder + +# prep vizdoom and any of its related requirements. +RUN pip3 install vizdoom + +# add coach source starting with files that could trigger +# re-build if dependencies change. +RUN mkdir /root/src +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +RUN pip3 install -r /root/src/requirements.txt + +FROM coach-base:master +WORKDIR /root/src +COPY --from=builder /root/.cache /root/.cache +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +COPY README.md /root/src/. +RUN pip3 install vizdoom && pip3 install -e .[all] && rm -rf /root/.cache +COPY . /root/src diff --git a/docker/Dockerfile.gym_environment b/docker/Dockerfile.gym_environment new file mode 100644 index 0000000..f667798 --- /dev/null +++ b/docker/Dockerfile.gym_environment @@ -0,0 +1,20 @@ +FROM coach-base:master as builder + +# prep gym and any of its related requirements. +RUN pip3 install gym[atari,box2d,classic_control]==0.10.5 + +# add coach source starting with files that could trigger +# re-build if dependencies change. +RUN mkdir /root/src +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +RUN pip3 install -r /root/src/requirements.txt + +FROM coach-base:master +WORKDIR /root/src +COPY --from=builder /root/.cache /root/.cache +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +COPY README.md /root/src/. +RUN pip3 install gym[atari,box2d,classic_control]==0.10.5 && pip3 install -e .[all] && rm -rf /root/.cache +COPY . /root/src diff --git a/docker/Dockerfile.mujoco_environment b/docker/Dockerfile.mujoco_environment new file mode 100644 index 0000000..1959283 --- /dev/null +++ b/docker/Dockerfile.mujoco_environment @@ -0,0 +1,31 @@ +FROM coach-base:master as builder + +# prep mujoco and any of its related requirements. +# Mujoco +RUN mkdir -p ~/.mujoco \ + && wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \ + && unzip -n mujoco.zip -d ~/.mujoco \ + && rm mujoco.zip +ARG MUJOCO_KEY +ENV MUJOCO_KEY=$MUJOCO_KEY +ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH +RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt +RUN pip3 install mujoco_py + +# add coach source starting with files that could trigger +# re-build if dependencies change. +RUN mkdir /root/src +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +RUN pip3 install -r /root/src/requirements.txt + +FROM coach-base:master +WORKDIR /root/src +COPY --from=builder /root/.mujoco /root/.mujoco +ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH +COPY --from=builder /root/.cache /root/.cache +COPY setup.py /root/src/. +COPY requirements.txt /root/src/. +COPY README.md /root/src/. +RUN pip3 install mujoco_py && pip3 install -e .[all] && rm -rf /root/.cache +COPY . /root/src diff --git a/docker/Makefile b/docker/Makefile index c409f8a..d101b7a 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -1,8 +1,7 @@ -# REGISTRY=nervana-dockrepo01.fm.intel.com:5001/ -# REGISTRY=gcr.io/ -REGISTRY=docker.io/ -IMAGE=zdwiel/coach -# IMAGE=gcr.io/deep-greens/inference:v5 +# REGISTRY=gcr.io +REGISTRY=docker.io +ORGANIZATION=nervana +IMAGE=coach BUILD_ARGUMENTS= RUN_ARGUMENTS= @@ -15,15 +14,23 @@ ifdef https_proxy BUILD_ARGUMENTS+=--build-arg https_proxy=$(https_proxy) RUN_ARGUMENTS+=--env https_proxy=$(https_proxy) endif +ifdef MUJOCO_KEY + BUILD_ARGUMENTS+=--build-arg MUJOCO_KEY=$(MUJOCO_KEY) +endif RUN_ARGUMENTS+=--rm RUN_ARGUMENTS+=--net host RUN_ARGUMENTS+=-v /tmp/checkpoint:/checkpoint UNIT_TESTS=python3 -m pytest rl_coach/tests -m unit_test -INTEGRATION_TESTS=python3 -m pytest rl_coach/tests -m integration_test -n auto --tb=short -GOLDEN_TESTS=python3 -m pytest rl_coach/tests -m golden_test -n auto -TRACE_TESTS=python3 rl_coach/tests/trace_tests.py -prl +INTEGRATION_TESTS=python3 -m pytest rl_coach/tests -m integration_test --tb=short +ifdef PRESETS + PRESETS := -p $(PRESETS) +else + PRESETS := +endif +GOLDEN_TESTS=python3 rl_coach/tests/test_golden.py ${PRESETS} +TRACE_TESTS=python3 rl_coach/tests/trace_tests.py -prl ${PRESETS} CONTEXT = $(realpath ..) @@ -31,7 +38,10 @@ ifndef DOCKER DOCKER = docker endif -build: +build_base: + ${DOCKER} build -f=Dockerfile.base -t=${IMAGE}-base:master ${BUILD_ARGUMENTS} ${CONTEXT} + +build: build_base ${DOCKER} build -f=Dockerfile -t=${IMAGE} ${BUILD_ARGUMENTS} ${CONTEXT} mkdir -p /tmp/checkpoint rm -rf /tmp/checkpoint/* @@ -40,13 +50,13 @@ shell: build ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} /bin/bash unit_tests: build - ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${UNIT_TESTS} -n 8 + ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${UNIT_TESTS} integration_tests: build ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${INTEGRATION_TESTS} golden_tests: build - ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${GOLDEN_TESTS} + ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${GOLDEN_TESTS} ${PRESETS} trace_tests: build ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${TRACE_TESTS} @@ -61,7 +71,7 @@ run_rollout_worker: build ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 rl_coach/rollout_worker.py --preset CartPole_DQN_distributed bootstrap_kubernetes: build push - kubectl run -i --tty --attach --image=${REGISTRY}${IMAGE} --restart=Never distributed-coach -- python3 rl_coach/orchestrators/start_training.py --preset CartPole_DQN_distributed --image ${IMAGE} -ns 10.63.249.182 -np / + kubectl run -i --tty --attach --image=${REGISTRY}/${IMAGE} --restart=Never distributed-coach -- python3 rl_coach/orchestrators/start_training.py --preset CartPole_DQN_distributed --image ${IMAGE} -ns 10.63.249.182 -np / stop_kubernetes: kubectl delete service --ignore-not-found redis-service @@ -75,8 +85,8 @@ kubernetes: stop_kubernetes python3 ${CONTEXT}/rl_coach/orchestrators/start_training.py --preset CartPole_DQN_distributed --image ${IMAGE} -ns 10.63.249.182 -np / push: build - ${DOCKER} tag ${IMAGE} ${REGISTRY}${IMAGE} - ${DOCKER} push ${REGISTRY}${IMAGE} + ${DOCKER} tag ${IMAGE} ${REGISTRY}/${ORGANIZATION}/${IMAGE} + ${DOCKER} push ${REGISTRY}/${ORGANIZATION}/${IMAGE} unit_tests_without_docker: cd .. && ${UNIT_TESTS} diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..b9a8e8f --- /dev/null +++ b/docker/README.md @@ -0,0 +1,27 @@ +# Container Images + +In this directory we've put together several different Dockerfile's that can be used to build +containers that have coach and other environments/dependencies installed. How to build these +and what each contains is defined below: + +## default `Dockerfile` +* `make build` to create the image +* will create a basic Coach installation along with Gym (atari), Mujoco, and Vizdoom environments. +* useful for running unit/integration tests `make unit_tests` to run these in the container +* `make shell` will launch this container locally, and provide a bash shell prompt. +* includes GPU support (derives from `Dockerfile.base` which is a CUDA ubuntu 16.04 derived image) + +## `Dockerfile.mujoco_environment` +* `docker build --build-arg MUJOCO_KEY=${MUJOCO_KEY} -f docker/Dockerfile.mujoco_environment .` + from the parent dir to create the image +* contains mujoco environment and Coach. +* you need to supply your own license key (base64 encrypted) as an environment variable `MUJOCO_KEY` + to ensure you get the complete Mujoco environment + +## `Dockerfile.gym_environment` +* `docker build -f docker/Dockerfile.gym_environment .` from the parent dir to create the image +* contains OpenAI Gym environment (and all extras) and Coach. + +## `Dockerfile.doom_environment` +* `docker build -f docker/Dockerfile.doom_environment .` from the parent dir to create the image +* contains vizdoom environment and Coach. diff --git a/docker/docker_entrypoint.sh b/docker/docker_entrypoint.sh deleted file mode 100644 index feccda2..0000000 --- a/docker/docker_entrypoint.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -set -e - -# # download mjpro150 -# mkdir /root/.mujoco -# cd /root/.mujoco -# wget https://www.roboti.us/download/mjpro150_linux.zip -# unzip mjpro150_linux.zip - -# copy the mujoco license key into the container -# echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin - -# git clone https://github.com/deepmind/dm_control.git -# pip3 install ./dm_control - -export VIZDOOM_ROOT=`pip show vizdoom 2>/dev/null | awk '/Location/{print $2}'`/vizdoom - -bash -c "$@" diff --git a/docs_raw/docs/usage.md b/docs_raw/docs/usage.md index 26d13b9..5edc860 100644 --- a/docs_raw/docs/usage.md +++ b/docs_raw/docs/usage.md @@ -12,7 +12,7 @@ This is the most common case. Just choose a preset using the `-p` flag and press ### Multi-threaded Algorithms -Multi-threaded algorithms are very common this days. +Multi-threaded algorithms are very common these days. They typically achieve the best results, and scale gracefully with the number of threads. In Coach, running such algorithms is done by selecting a suitable preset, and choosing the number of threads to run using the `-n` flag. @@ -130,4 +130,4 @@ The most up to date description can be found by using the `-h` flag. |`-et ENVIRONMENT_TYPE`, `--environment_type ENVIRONMENT_TYPE`|string|Choose an environment type class to override on top of the selected preset. If no preset is defined, a preset can be set from the command-line by combining settings which are set by using `--agent_type`, `--experiment_type`, `--environemnt_type`| |`-ept EXPLORATION_POLICY_TYPE`, `--exploration_policy_type EXPLORATION_POLICY_TYPE`|string|Choose an exploration policy type class to override on top of the selected preset.If no preset is defined, a preset can be set from the command-line by combining settings which are set by using `--agent_type`, `--experiment_type`, `--environemnt_type`| |`-lvl LEVEL`, `--level LEVEL` |string|Choose the level that will be played in the environment that was selected. This value will override the level parameter in the environment class.| -|`-cp CUSTOM_PARAMETER`, `--custom_parameter CUSTOM_PARAMETER`|string| Semicolon separated parameters used to override specific parameters on top of the selected preset (or on top of the command-line assembled one). Whenever a parameter value is a string, it should be inputted as `'\"string\"'`. For ex.: `"visualization.render=False;` `num_training_iterations=500;` `optimizer='rmsprop'"`| \ No newline at end of file +|`-cp CUSTOM_PARAMETER`, `--custom_parameter CUSTOM_PARAMETER`|string| Semicolon separated parameters used to override specific parameters on top of the selected preset (or on top of the command-line assembled one). Whenever a parameter value is a string, it should be inputted as `'\"string\"'`. For ex.: `"visualization.render=False;` `num_training_iterations=500;` `optimizer='rmsprop'"`| diff --git a/requirements.txt b/requirements.txt index 90c6785..2e1d515 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ PyOpenGL==3.1.0 scipy==0.19.0 scikit-image==0.13.0 box2d==2.3.2 -gym==0.10.5 +gym[atari]==0.10.5 bokeh==0.13.0 futures==3.1.1 wxPython==4.0.1 diff --git a/rl_coach/agents/mmc_agent.py b/rl_coach/agents/mmc_agent.py index 964d922..4e5fe0a 100644 --- a/rl_coach/agents/mmc_agent.py +++ b/rl_coach/agents/mmc_agent.py @@ -64,7 +64,7 @@ class MixedMonteCarloAgent(ValueOptimizationAgent): one_step_target = batch.rewards()[i] + \ (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * \ q_st_plus_1[i][selected_actions[i]] - monte_carlo_target = total_returns()[i] + monte_carlo_target = total_returns[i] TD_targets[i, batch.actions()[i]] = (1 - self.mixing_rate) * one_step_target + \ self.mixing_rate * monte_carlo_target diff --git a/rl_coach/environments/doom_environment.py b/rl_coach/environments/doom_environment.py index d4269ba..74e55be 100644 --- a/rl_coach/environments/doom_environment.py +++ b/rl_coach/environments/doom_environment.py @@ -132,8 +132,12 @@ class DoomEnvironment(Environment): # load the emulator with the required level self.level = DoomLevel[level.upper()] local_scenarios_path = path.join(os.path.dirname(os.path.realpath(__file__)), 'doom') - self.scenarios_dir = local_scenarios_path if 'COACH_LOCAL' in level \ - else path.join(environ.get('VIZDOOM_ROOT'), 'scenarios') + if 'COACH_LOCAL' in level: + self.scenarios_dir = local_scenarios_path + elif 'VIZDOOM_ROOT' in environ: + self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios') + else: + self.scenarios_dir = path.join(os.path.dirname(os.path.realpath(vizdoom.__file__)), 'scenarios') self.game = vizdoom.DoomGame() self.game.load_config(path.join(self.scenarios_dir, self.level.value)) diff --git a/rl_coach/tests/README.md b/rl_coach/tests/README.md index 228829b..d07f35a 100644 --- a/rl_coach/tests/README.md +++ b/rl_coach/tests/README.md @@ -9,11 +9,12 @@ several parts, each testing the framework in different areas and strictness. * **Docker** - The docker image we supply checks Coach in terms of installation process, and verifies that all the components - are installed correctly. To build the Docke, use the command: + are installed correctly. To build the Docker image, use the command: ``` - docker build . -t coach - docker run -it coach /bin/bash + cd docker + make build_base && make build + make run ``` @@ -45,7 +46,7 @@ several parts, each testing the framework in different areas and strictness. The golden tests can be run using the following command: ``` - python3 rl_coach/tests/golden_tests.py + python3 rl_coach/tests/test_golden.py ``` * **Trace tests** - diff --git a/rl_coach/tests/test_eks.py b/rl_coach/tests/test_eks.py index f75ab04..1726713 100644 --- a/rl_coach/tests/test_eks.py +++ b/rl_coach/tests/test_eks.py @@ -37,6 +37,7 @@ class EKSHandler(): container = client.V1Container( name=self.test_name, image=self.image, + command=['/bin/bash', '-c'], args=[self.test_command], image_pull_policy='Always', working_dir=self.working_dir, diff --git a/rl_coach/tests/test_golden.py b/rl_coach/tests/test_golden.py index d2ce972..2bbcb82 100644 --- a/rl_coach/tests/test_golden.py +++ b/rl_coach/tests/test_golden.py @@ -94,14 +94,13 @@ def collect_presets(): yield preset_name -print(list(collect_presets())) @pytest.fixture(params=list(collect_presets())) def preset_name(request): return request.param @pytest.mark.golden_test -def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60): +def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60, verbose=False): preset_validation_params = validation_params(preset_name) win_size = 10 @@ -200,12 +199,12 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60): else: if time.time() - start_time > time_limit: screen.error("Failed due to exceeding time limit", crash=False) - if args.verbose: + if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) elif csv_paths: screen.error("Failed due to insufficient reward", crash=False) - if args.verbose: + if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) screen.error("preset_validation_params.max_episodes_to_achieve_reward: {}".format( @@ -216,7 +215,7 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60): screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False) else: screen.error("csv file never found", crash=False) - if args.verbose: + if verbose: screen.error("command exitcode: {}".format(p.returncode), crash=False) screen.error(open(log_file_name).read(), crash=False) @@ -227,12 +226,12 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60): def main(): parser = argparse.ArgumentParser() - parser.add_argument('-p', '--preset', - help="(string) Name of a preset to run (as configured in presets.py)", + parser.add_argument('-p', '--preset', '--presets', + help="(string) Name of preset(s) to run (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument('-ip', '--ignore_presets', - help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", + help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument('-v', '--verbose', @@ -251,7 +250,7 @@ def main(): args = parser.parse_args() if args.preset is not None: - presets_lists = [args.preset] + presets_lists = args.preset.split(',') else: presets_lists = all_presets() @@ -268,6 +267,7 @@ def main(): if args.stop_after_first_failure and fail_count > 0: break if preset_name not in presets_to_ignore: + print("Attempting to run Preset: %s" % preset_name) if not importable(preset_name): screen.error("Failed to load preset <{}>".format(preset_name), crash=False) fail_count += 1 @@ -278,7 +278,7 @@ def main(): continue test_count += 1 - test_passed = test_preset_reward(preset_name, args.no_progress_bar, args.time_limit) + test_passed = test_preset_reward(preset_name, args.no_progress_bar, args.time_limit, args.verbose) if not test_passed: fail_count += 1 diff --git a/rl_coach/tests/trace_tests.py b/rl_coach/tests/trace_tests.py index a307c5b..39095f7 100644 --- a/rl_coach/tests/trace_tests.py +++ b/rl_coach/tests/trace_tests.py @@ -168,12 +168,12 @@ def wait_and_check(args, processes, force=False): def main(): parser = argparse.ArgumentParser() - parser.add_argument('-p', '--preset', - help="(string) Name of a preset to run (as configured in presets.py)", + parser.add_argument('-p', '--preset', '--presets', + help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)", default=None, type=str) parser.add_argument('-ip', '--ignore_presets', - help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)", + help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)", default=None, type=str) parser.add_argument('-v', '--verbose', @@ -198,7 +198,7 @@ def main(): args.max_threads = 1 if args.preset is not None: - presets_lists = [args.preset] + presets_lists = args.preset.split(',') else: presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if f[-3:] == '.py' and not f == '__init__.py'] diff --git a/setup.py b/setup.py index 70790b2..9e6152a 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() install_requires = list() +extras = dict() with open(path.join(here, 'requirements.txt'), 'r') as f: for line in f: @@ -65,13 +66,10 @@ if not using_GPU: 'https://anaconda.org/intel/tensorflow/1.6.0/download/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl'], shell=True) install_requires.append('tensorflow==1.6.0') + extras['mxnet'] = ['mxnet-cu90mkl>=1.3.0'] else: install_requires.append('tensorflow-gpu==1.9.0') - -# Framework-specific dependencies. -extras = { - 'mxnet': ['mxnet-cu90mkl>=1.3.0'] -} + extras['mxnet'] = ['mxnet-mkl>=1.3.0'] all_deps = [] for group_name in extras: