1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

Setup basic CI flow (#38)

Adds automated running of unit, integration tests (and optionally longer running tests)
This commit is contained in:
Ajay Deshpande
2018-10-24 18:27:58 -07:00
committed by Scott Leishman
parent 2cc6abc3c4
commit 16b3e99f37
10 changed files with 408 additions and 129 deletions

136
.circleci/config.yml Normal file
View File

@@ -0,0 +1,136 @@
aliases:
- &executor_prep
docker:
- image: circleci/python:3.7.0-stretch
working_directory: ~/repo
- &remote_docker
# ensure layers of constructed docker containers are cached for reuse between jobs.
setup_remote_docker:
docker_layer_caching: true
- &restore_cache
restore_cache:
keys:
- v1-dependencies-{{ checksum "requirements.txt" }}
# fallback to using the latest cache if no exact match is found
- v1-dependencies-
- &save_cache
save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum "requirements.txt" }}
- &aws_prep
run:
name: Prepare aws cli
command: |
sudo pip install awscli pytest kubernetes==8.0.0b1
export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode`
export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode`
$(aws ecr get-login --no-include-email --region us-west-2)
sudo curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.10.3/2018-07-26/bin/linux/amd64/aws-iam-authenticator
sudo chmod a+x /usr/local/bin/aws-iam-authenticator
aws eks update-kubeconfig --name coach-aws-cicd
version: 2
jobs:
build:
<<: *executor_prep
steps:
- checkout
- *remote_docker
- *restore_cache
- *aws_prep
- run:
name: Build and push container
command: |
REGISTRY=316971102342.dkr.ecr.us-west-2.amazonaws.com
TAG=$(git describe --tags --always --dirty)
docker pull ${REGISTRY}/coach-base:${MASTER_BRANCH}
docker build --cache-from ${REGISTRY}/coach-base:${MASTER_BRANCH} -t ${REGISTRY}/coach-base:${TAG} -f docker/Dockerfile.base .
docker push ${REGISTRY}/coach-base:${TAG}
docker tag ${REGISTRY}/coach-base:${TAG} coach-base:master
docker build -t ${REGISTRY}/coach:${TAG} -f docker/Dockerfile .
docker push ${REGISTRY}/coach:${TAG}
no_output_timeout: 30m
unit_tests:
<<: *executor_prep
steps:
- checkout
- *remote_docker
- *restore_cache
- *aws_prep
- run:
name: run unit tests
command: |
export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode`
export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode`
python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn unit-test -tc 'make unit_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096
integration_tests:
<<: *executor_prep
steps:
- checkout
- *remote_docker
- *restore_cache
- *aws_prep
- run:
name: run integration tests
command: |
export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode`
export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode`
python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn integration-test -tc 'make integration_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096
golden_tests:
<<: *executor_prep
steps:
- checkout
- *remote_docker
- *restore_cache
- *aws_prep
- run:
name: run golden tests
command: |
export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode`
export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode`
python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn golden-test -tc 'make golden_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096
trace_tests:
<<: *executor_prep
steps:
- checkout
- *remote_docker
- *restore_cache
- *aws_prep
- run:
name: run trace tests
command: |
export AWS_ACCESS_KEY_ID=`echo ${AWS_ACCESS_KEY_ID} | base64 --decode`
export AWS_SECRET_ACCESS_KEY=`echo ${AWS_SECRET_ACCESS_KEY} | base64 --decode`
python3 rl_coach/tests/test_eks.py -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn trace-test -tc 'make trace_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096
workflows:
version: 2
build_and_test:
jobs:
- build
- unit_tests:
requires:
- build
- integration_tests:
requires:
- build
- e2e_approval:
type: approval
requires:
- build
- golden_tests:
requires:
- e2e_approval
- trace_tests:
requires:
- e2e_approval

2
.gitignore vendored
View File

@@ -20,12 +20,12 @@ rl_coach.egg*
contrib contrib
test_log_* test_log_*
dist dist
.DS_Store
datasets datasets
.cache .cache
.pytest_cache .pytest_cache
core core
trace_test* trace_test*
.DS_Store
*.swp *.swp
*.swo *.swo
.cache/ .cache/

View File

@@ -1,120 +1,4 @@
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 FROM coach-base:master
# https://github.com/NVIDIA/nvidia-docker/issues/619
RUN rm /etc/apt/sources.list.d/cuda.list
RUN apt-get update && \
apt-get upgrade -y && \
apt-get clean autoclean && \
apt-get autoremove -y
RUN apt-get update && \
apt-get install -y python-pip && \
apt-get clean autoclean && \
apt-get autoremove -y
RUN pip install pip --upgrade
WORKDIR /root
################################
# Install apt-get Requirements #
################################
# General
RUN apt-get update && \
apt-get install -y python3-pip cmake zlib1g-dev python3-tk python-opencv && \
apt-get clean autoclean && \
apt-get autoremove -y
# Boost libraries
RUN apt-get update && \
apt-get install -y libboost-all-dev && \
apt-get clean autoclean && \
apt-get autoremove -y
# Scipy requirements
RUN apt-get update && \
apt-get install -y libblas-dev liblapack-dev libatlas-base-dev gfortran && \
apt-get clean autoclean && \
apt-get autoremove -y
# Pygame requirements
RUN apt-get update && \
apt-get install -y libsdl-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev && \
apt-get clean autoclean && \
apt-get autoremove -y
RUN apt-get update && \
apt-get install -y libsmpeg-dev libportmidi-dev libavformat-dev libswscale-dev && \
apt-get clean autoclean && \
apt-get autoremove -y
# Dashboard
RUN apt-get update && \
apt-get install -y dpkg-dev build-essential python3.5-dev libjpeg-dev libtiff-dev libsdl1.2-dev libnotify-dev \
freeglut3 freeglut3-dev libsm-dev libgtk2.0-dev libgtk-3-dev libwebkitgtk-dev libgtk-3-dev \
libwebkitgtk-3.0-dev libgstreamer-plugins-base1.0-dev && \
apt-get clean autoclean && \
apt-get autoremove -y
# Gym
RUN apt-get update && \
apt-get install -y libav-tools libsdl2-dev swig cmake && \
apt-get clean autoclean && \
apt-get autoremove -y
# Mujoco_py
RUN apt-get update && \
apt-get install -y curl libgl1-mesa-dev libgl1-mesa-glx libglew-dev libosmesa6-dev software-properties-common && \
apt-get clean autoclean && \
apt-get autoremove -y
# ViZDoom
RUN apt-get update && \
apt-get install -y build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip wget && \
apt-get clean autoclean && \
apt-get autoremove -y
############################
# Install Pip Requirements #
############################
RUN pip3 install --upgrade pip
RUN pip3 install pytest
RUN pip3 install pytest-xdist
# initial installation of coach, so that the docker build won't install everything from scratch
RUN pip3 install rl_coach>=0.10.0
# install additional environments
RUN pip3 install gym[atari]==0.10.5
RUN pip3 install mujoco_py==1.50.1.56
RUN pip3 install vizdoom==1.1.6
# FROM ubuntu:16.04
#
# RUN apt-get update \
# && apt-get install -y \
# python3-pip cmake zlib1g-dev python3-tk python-opencv \
# libboost-all-dev \
# libblas-dev liblapack-dev libatlas-base-dev gfortran \
# libsdl-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev \
# libsmpeg-dev libportmidi-dev libavformat-dev libswscale-dev \
# dpkg-dev build-essential python3.5-dev libjpeg-dev libtiff-dev \
# libsdl1.2-dev libnotify-dev freeglut3 freeglut3-dev libsm-dev \
# libgtk2.0-dev libgtk-3-dev libwebkitgtk-dev libgtk-3-dev \
# libwebkitgtk-3.0-dev libgstreamer-plugins-base1.0-dev \
# libav-tools libsdl2-dev swig
#
# # installing python dependencies
# RUN pip3 install --upgrade pip
RUN apt-get update && apt-get install -y wget zip
RUN mkdir -p ~/.mujoco \
&& wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
&& unzip mujoco.zip -d ~/.mujoco \
&& rm mujoco.zip
ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
RUN curl -o /usr/local/bin/patchelf https://s3-us-west-2.amazonaws.com/openai-sci-artifacts/manual-builds/patchelf_0.9_amd64.elf \
&& chmod +x /usr/local/bin/patchelf
RUN mkdir /root/src RUN mkdir /root/src
COPY setup.py /root/src/. COPY setup.py /root/src/.

63
docker/Dockerfile.base Normal file
View File

@@ -0,0 +1,63 @@
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
# https://github.com/NVIDIA/nvidia-docker/issues/619
RUN rm /etc/apt/sources.list.d/cuda.list
RUN apt-get update && \
apt-get upgrade -y && \
apt-get clean autoclean && \
apt-get autoremove -y && apt-get update && \
apt-get install -y python-pip && \
apt-get clean autoclean && \
apt-get autoremove -y
RUN pip install pip --upgrade
WORKDIR /root
################################
# Install apt-get Requirements #
################################
# General
RUN apt-get update && \
apt-get install -y python3-pip cmake zlib1g-dev python3-tk python-opencv \
# Boost libraries
libboost-all-dev \
# Scipy requirements
libblas-dev liblapack-dev libatlas-base-dev gfortran \
# Pygame requirements
libsdl-dev libsdl-image1.2-dev libsdl-mixer1.2-dev libsdl-ttf2.0-dev \
libsmpeg-dev libportmidi-dev libavformat-dev libswscale-dev \
# Dashboard
dpkg-dev build-essential python3.5-dev libjpeg-dev libtiff-dev libsdl1.2-dev libnotify-dev \
freeglut3 freeglut3-dev libsm-dev libgtk2.0-dev libgtk-3-dev libwebkitgtk-dev libgtk-3-dev \
libwebkitgtk-3.0-dev libgstreamer-plugins-base1.0-dev \
# Gym
libav-tools libsdl2-dev swig cmake \
# Mujoco_py
curl libgl1-mesa-dev libgl1-mesa-glx libglew-dev libosmesa6-dev software-properties-common \
# ViZDoom
build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip wget && \
apt-get clean autoclean && \
apt-get autoremove -y
############################
# Install Pip Requirements #
############################
RUN pip3 install --upgrade pip
RUN pip3 install pytest
RUN pip3 install pytest-xdist
# initial installation of coach, so that the docker build won't install everything from scratch
RUN pip3 install rl_coach>=0.10.0 && pip3 install gym[atari]==0.10.5 && \
pip3 install mujoco_py==1.50.1.56 && pip3 install vizdoom==1.1.6
RUN mkdir -p ~/.mujoco \
&& wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
&& unzip mujoco.zip -d ~/.mujoco \
&& rm mujoco.zip
# COPY ./mjkey.txt /root/.mujoco/
ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
RUN curl -o /usr/local/bin/patchelf https://s3-us-west-2.amazonaws.com/openai-sci-artifacts/manual-builds/patchelf_0.9_amd64.elf \
&& chmod +x /usr/local/bin/patchelf

View File

@@ -20,6 +20,11 @@ RUN_ARGUMENTS+=--rm
RUN_ARGUMENTS+=--net host RUN_ARGUMENTS+=--net host
RUN_ARGUMENTS+=-v /tmp/checkpoint:/checkpoint RUN_ARGUMENTS+=-v /tmp/checkpoint:/checkpoint
UNIT_TESTS=python3 -m pytest rl_coach/tests -m unit_test
INTEGRATION_TESTS=python3 -m pytest rl_coach/tests -m integration_test -n auto --tb=short
GOLDEN_TESTS=python3 -m pytest rl_coach/tests -m golden_test -n auto
TRACE_TESTS=python3 rl_coach/tests/trace_tests.py -prl
CONTEXT = $(realpath ..) CONTEXT = $(realpath ..)
ifndef DOCKER ifndef DOCKER
@@ -35,17 +40,16 @@ shell: build
${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} /bin/bash ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} /bin/bash
unit_tests: build unit_tests: build
${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 -m pytest rl_coach/tests -m unit_test -n 8 ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${UNIT_TESTS} -n 8
integration_tests: build integration_tests: build
${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 -m pytest rl_coach/tests -m integration_test -n auto --tb=short ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${INTEGRATION_TESTS}
golden_tests: build golden_tests: build
# ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 rl_coach/tests/golden_tests.py ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${GOLDEN_TESTS}
time ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 -m pytest rl_coach/tests -m golden_test -n auto
trace_tests: build trace_tests: build
${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} python3 rl_coach/tests/trace_tests.py -prl ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${TRACE_TESTS}
run: build run: build
${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE} ${DOCKER} run ${RUN_ARGUMENTS} -it ${IMAGE}
@@ -73,3 +77,15 @@ kubernetes: stop_kubernetes
push: build push: build
${DOCKER} tag ${IMAGE} ${REGISTRY}${IMAGE} ${DOCKER} tag ${IMAGE} ${REGISTRY}${IMAGE}
${DOCKER} push ${REGISTRY}${IMAGE} ${DOCKER} push ${REGISTRY}${IMAGE}
unit_tests_without_docker:
cd .. && ${UNIT_TESTS}
integration_tests_without_docker:
cd .. && ${INTEGRATION_TESTS}
golden_tests_without_docker:
cd .. && ${GOLDEN_TESTS}
trace_tests_without_docker:
cd .. && ${TRACE_TESTS}

View File

@@ -16,6 +16,4 @@ set -e
export VIZDOOM_ROOT=`pip show vizdoom 2>/dev/null | awk '/Location/{print $2}'`/vizdoom export VIZDOOM_ROOT=`pip show vizdoom 2>/dev/null | awk '/Location/{print $2}'`/vizdoom
cd /root/src/ bash -c "$@"
exec "$@"

View File

@@ -12,6 +12,7 @@ gym==0.10.5
bokeh==0.13.0 bokeh==0.13.0
futures==3.1.1 futures==3.1.1
wxPython==4.0.1 wxPython==4.0.1
kubernetes==7.0.0 kubernetes==8.0.0b1
redis==2.10.6 redis==2.10.6
minio==4.0.5 minio==4.0.5
pytest==3.8.2

View File

@@ -364,7 +364,6 @@ class GraphManager(object):
if self.agent_params.memory.memory_backend_params.run_type == "worker": if self.agent_params.memory.memory_backend_params.run_type == "worker":
data_store = get_data_store(self.data_store_params) data_store = get_data_store(self.data_store_params)
data_store.load_from_store() data_store.load_from_store()
# perform several steps of playing # perform several steps of playing
count_end = self.current_step_counter + steps count_end = self.current_step_counter + steps
while self.current_step_counter < count_end: while self.current_step_counter < count_end:

View File

@@ -36,7 +36,6 @@ agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False
############### ###############
env_params = DoomEnvironmentParameters(level='basic') env_params = DoomEnvironmentParameters(level='basic')
######## ########
# Test # # Test #
######## ########

183
rl_coach/tests/test_eks.py Normal file
View File

@@ -0,0 +1,183 @@
import argparse
import pytest
import time
from kubernetes import client, config
class EKSHandler():
def __init__(self, cluster, build_num, test_name, test_command, image, cpu, memory, working_dir):
self.cluster = cluster
self.build_num = build_num
self.test_name = test_name
self.test_command = test_command
self.image = image
self.cpu = cpu
self.memory = memory
config.load_kube_config()
self.namespace = '{}-{}'.format(test_name, build_num)
self.corev1_api = client.CoreV1Api()
self.create_namespace()
self.working_dir = working_dir
def create_namespace(self):
namespace = client.V1Namespace(
api_version='v1',
kind="Namespace",
metadata=client.V1ObjectMeta(name=self.namespace)
)
try:
self.corev1_api.create_namespace(namespace)
except client.rest.ApiException as e:
raise RuntimeError("Failed to create namesapce. Got exception: {}".format(e))
def deploy(self):
container = client.V1Container(
name=self.test_name,
image=self.image,
args=[self.test_command],
image_pull_policy='Always',
working_dir=self.working_dir,
stdin=True,
tty=True
)
pod_spec = client.V1PodSpec(
containers=[container],
restart_policy='Never'
)
pod = client.V1Pod(
api_version="v1",
kind="Pod",
metadata=client.V1ObjectMeta(name=self.test_name),
spec=pod_spec
)
try:
self.corev1_api.create_namespaced_pod(self.namespace, pod)
except client.rest.ApiException as e:
print("Got exception: {} while creating a pod".format(e))
return 1
return 0
def print_logs(self):
while True:
time.sleep(10)
# Try to tail the pod logs
try:
for line in self.corev1_api.read_namespaced_pod_log(
self.test_name, self.namespace, follow=True,
_preload_content=False
):
print(line.decode('utf-8'), flush=True, end='')
except client.rest.ApiException as e:
pass
try:
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
except client.rest.ApiException as e:
continue
if not hasattr(pod, 'status') or not pod.status:
continue
if not hasattr(pod.status, 'container_statuses') or not pod.status.container_statuses:
continue
for container_status in pod.status.container_statuses:
if container_status.state.waiting is not None:
if container_status.state.waiting.reason == 'Error' or \
container_status.state.waiting.reason == 'CrashLoopBackOff' or \
container_status.state.waiting.reason == 'ImagePullBackOff' or \
container_status.state.waiting.reason == 'ErrImagePull':
return
if container_status.state.terminated is not None:
return
def get_return_status(self):
# This part will get executed if the pod is one of the following phases: not ready, failed or terminated.
# Check if the pod has errored out, else just try again.
# Get the pod
try:
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
except client.rest.ApiException as e:
return 1
if not hasattr(pod, 'status') or not pod.status:
return 0
if not hasattr(pod.status, 'container_statuses') or not pod.status.container_statuses:
return 0
for container_status in pod.status.container_statuses:
if container_status.state.waiting is not None:
if container_status.state.waiting.reason == 'Error' or \
container_status.state.waiting.reason == 'CrashLoopBackOff' or \
container_status.state.waiting.reason == 'ImagePullBackOff' or \
container_status.state.waiting.reason == 'ErrImagePull':
return 1
if container_status.state.terminated is not None:
return container_status.state.terminated.exit_code
def cleanup(self):
# Delete pod
try:
self.corev1_api.delete_namespaced_pod(self.test_name, self.namespace, client.V1DeleteOptions())
except client.rest.ApiException as e:
print("Got exception while deleting pod: {}".format(e))
# Delete namespace
try:
self.corev1_api.delete_namespace(self.namespace, client.V1DeleteOptions())
except client.rest.ApiException as e:
print("Got exception while deleting namespace: {}".format(e))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-c', '--cluster', help="(string) Name of the cluster", type=str, required=True
)
parser.add_argument(
'-bn', '--build-num', help="(int) CI Build number", type=int, required=True
)
parser.add_argument(
'-tn', '--test-name', help="(string) Name of the test", type=str, required=True
)
parser.add_argument(
'-tc', '--test-command', help="(string) command to execute", type=str, required=True
)
parser.add_argument(
'-i', '--image', help="(string) Container image", type=str, required=True
)
parser.add_argument(
'-cpu', help="(string) Units of cpu to use", type=str, required=True
)
parser.add_argument(
'-mem', help="(string) The amount in megabytes", type=str, required=True
)
parser.add_argument(
'--working-dir', help="(string) The working dir in the container", type=str, required=False,
default='/root/src/docker'
)
args = parser.parse_args()
obj = EKSHandler(
args.cluster, args.build_num, args.test_name, args.test_command,
args.image, args.cpu, args.mem, args.working_dir
)
if obj.deploy() != 0:
obj.cleanup()
pytest.fail("Failed to deploy")
obj.print_logs()
if obj.get_return_status() != 0:
obj.cleanup()
pytest.fail("Failed to run tests")
obj.cleanup()