diff --git a/benchmarks/a3c/README.md b/benchmarks/a3c/README.md
index 3a2769a..ba3749a 100644
--- a/benchmarks/a3c/README.md
+++ b/benchmarks/a3c/README.md
@@ -6,11 +6,11 @@ The parameters used for Clipped PPO are the same parameters as described in the
### Inverted Pendulum A3C - 1/2/4/8/16 workers
```bash
-python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 1
-python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 2
-python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 4
-python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 8
-python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 16
+coach -p Mujoco_A3C -lvl inverted_pendulum -n 1
+coach -p Mujoco_A3C -lvl inverted_pendulum -n 2
+coach -p Mujoco_A3C -lvl inverted_pendulum -n 4
+coach -p Mujoco_A3C -lvl inverted_pendulum -n 8
+coach -p Mujoco_A3C -lvl inverted_pendulum -n 16
```
@@ -19,7 +19,7 @@ python3 coach.py -p Mujoco_A3C -lvl inverted_pendulum -n 16
### Hopper A3C - 16 workers
```bash
-python3 coach.py -p Mujoco_A3C -lvl hopper -n 16
+coach -p Mujoco_A3C -lvl hopper -n 16
```
@@ -28,7 +28,7 @@ python3 coach.py -p Mujoco_A3C -lvl hopper -n 16
### Walker2D A3C - 16 workers
```bash
-python3 coach.py -p Mujoco_A3C -lvl walker2d -n 16
+coach -p Mujoco_A3C -lvl walker2d -n 16
```
@@ -37,7 +37,7 @@ python3 coach.py -p Mujoco_A3C -lvl walker2d -n 16
### Half Cheetah A3C - 16 workers
```bash
-python3 coach.py -p Mujoco_A3C -lvl half_cheetah -n 16
+coach -p Mujoco_A3C -lvl half_cheetah -n 16
```
@@ -46,7 +46,7 @@ python3 coach.py -p Mujoco_A3C -lvl half_cheetah -n 16
### Ant A3C - 16 workers
```bash
-python3 coach.py -p Mujoco_A3C -lvl ant -n 16
+coach -p Mujoco_A3C -lvl ant -n 16
```
@@ -56,7 +56,7 @@ python3 coach.py -p Mujoco_A3C -lvl ant -n 16
### Space Invaders A3C - 16 workers
```bash
-python3 coach.py -p Atari_A3C -lvl space_invaders -n 16
+coach -p Atari_A3C -lvl space_invaders -n 16
```
diff --git a/benchmarks/bootstrapped_dqn/README.md b/benchmarks/bootstrapped_dqn/README.md
index 8a5f059..1b8dd3c 100644
--- a/benchmarks/bootstrapped_dqn/README.md
+++ b/benchmarks/bootstrapped_dqn/README.md
@@ -6,7 +6,7 @@ The parameters used for Bootstrapped DQN are the same parameters as described in
### Breakout Bootstrapped DQN - single worker
```bash
-python3 coach.py -p Atari_Bootstrapped_DQN -lvl breakout
+coach -p Atari_Bootstrapped_DQN -lvl breakout
```
@@ -15,7 +15,7 @@ python3 coach.py -p Atari_Bootstrapped_DQN -lvl breakout
### Pong Bootstrapped DQN - single worker
```bash
-python3 coach.py -p Atari_Bootstrapped_DQN -lvl pong
+coach -p Atari_Bootstrapped_DQN -lvl pong
```
@@ -24,7 +24,7 @@ python3 coach.py -p Atari_Bootstrapped_DQN -lvl pong
### Space Invaders Bootstrapped DQN - single worker
```bash
-python3 coach.py -p Atari_Bootstrapped_DQN -lvl space_invaders
+coach -p Atari_Bootstrapped_DQN -lvl space_invaders
```
diff --git a/benchmarks/clipped_ppo/README.md b/benchmarks/clipped_ppo/README.md
index 00f2766..9eedf09 100644
--- a/benchmarks/clipped_ppo/README.md
+++ b/benchmarks/clipped_ppo/README.md
@@ -6,7 +6,7 @@ The parameters used for Clipped PPO are the same parameters as described in the
### Inverted Pendulum Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl inverted_pendulum
+coach -p Mujoco_ClippedPPO -lvl inverted_pendulum
```
@@ -15,7 +15,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl inverted_pendulum
### Inverted Double Pendulum Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl inverted_double_pendulum
+coach -p Mujoco_ClippedPPO -lvl inverted_double_pendulum
```
@@ -24,7 +24,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl inverted_double_pendulum
### Reacher Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl reacher
+coach -p Mujoco_ClippedPPO -lvl reacher
```
@@ -33,7 +33,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl reacher
### Hopper Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl hopper
+coach -p Mujoco_ClippedPPO -lvl hopper
```
@@ -42,7 +42,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl hopper
### Half Cheetah Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl half_cheetah
+coach -p Mujoco_ClippedPPO -lvl half_cheetah
```
@@ -51,7 +51,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl half_cheetah
### Walker 2D Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl walker2d
+coach -p Mujoco_ClippedPPO -lvl walker2d
```
@@ -60,7 +60,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl walker2d
### Ant Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl ant
+coach -p Mujoco_ClippedPPO -lvl ant
```
@@ -69,7 +69,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl ant
### Swimmer Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl swimmer
+coach -p Mujoco_ClippedPPO -lvl swimmer
```
@@ -78,7 +78,7 @@ python3 coach.py -p Mujoco_ClippedPPO -lvl swimmer
### Humanoid Clipped PPO - single worker
```bash
-python3 coach.py -p Mujoco_ClippedPPO -lvl humanoid
+coach -p Mujoco_ClippedPPO -lvl humanoid
```
diff --git a/benchmarks/ddpg/README.md b/benchmarks/ddpg/README.md
index f10fa0e..0163d09 100644
--- a/benchmarks/ddpg/README.md
+++ b/benchmarks/ddpg/README.md
@@ -6,7 +6,7 @@ The parameters used for DDPG are the same parameters as described in the [origin
### Inverted Pendulum DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl inverted_pendulum
+coach -p Mujoco_DDPG -lvl inverted_pendulum
```
@@ -15,7 +15,7 @@ python3 coach.py -p Mujoco_DDPG -lvl inverted_pendulum
### Inverted Double Pendulum DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl inverted_double_pendulum
+coach -p Mujoco_DDPG -lvl inverted_double_pendulum
```
@@ -24,7 +24,7 @@ python3 coach.py -p Mujoco_DDPG -lvl inverted_double_pendulum
### Reacher DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl reacher
+coach -p Mujoco_DDPG -lvl reacher
```
@@ -33,7 +33,7 @@ python3 coach.py -p Mujoco_DDPG -lvl reacher
### Hopper DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl hopper
+coach -p Mujoco_DDPG -lvl hopper
```
@@ -42,7 +42,7 @@ python3 coach.py -p Mujoco_DDPG -lvl hopper
### Half Cheetah DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl half_cheetah
+coach -p Mujoco_DDPG -lvl half_cheetah
```
@@ -51,7 +51,7 @@ python3 coach.py -p Mujoco_DDPG -lvl half_cheetah
### Walker 2D DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl walker2d
+coach -p Mujoco_DDPG -lvl walker2d
```
@@ -60,7 +60,7 @@ python3 coach.py -p Mujoco_DDPG -lvl walker2d
### Ant DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl ant
+coach -p Mujoco_DDPG -lvl ant
```
@@ -69,7 +69,7 @@ python3 coach.py -p Mujoco_DDPG -lvl ant
### Swimmer DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl swimmer
+coach -p Mujoco_DDPG -lvl swimmer
```
@@ -78,7 +78,7 @@ python3 coach.py -p Mujoco_DDPG -lvl swimmer
### Humanoid DDPG - single worker
```bash
-python3 coach.py -p Mujoco_DDPG -lvl humanoid
+coach -p Mujoco_DDPG -lvl humanoid
```
diff --git a/benchmarks/ddpg_her/README.md b/benchmarks/ddpg_her/README.md
index 6dfdc57..10756ef 100644
--- a/benchmarks/ddpg_her/README.md
+++ b/benchmarks/ddpg_her/README.md
@@ -6,7 +6,7 @@ The parameters used for DDPG HER are the same parameters as described in the [fo
### Fetch Reach DDPG HER - single worker
```bash
-python3 coach.py -p Fetch_DDPG_HER_baselines -lvl reach
+coach -p Fetch_DDPG_HER_baselines -lvl reach
```
@@ -15,7 +15,7 @@ python3 coach.py -p Fetch_DDPG_HER_baselines -lvl reach
### Fetch Push DDPG HER - 8 workers
```bash
-python3 coach.py -p Fetch_DDPG_HER_baselines -lvl push -n 8
+coach -p Fetch_DDPG_HER_baselines -lvl push -n 8
```
@@ -24,7 +24,7 @@ python3 coach.py -p Fetch_DDPG_HER_baselines -lvl push -n 8
### Fetch Slide DDPG HER - 8 workers
```bash
-python3 coach.py -p Fetch_DDPG_HER_baselines -lvl slide -n 8
+coach -p Fetch_DDPG_HER_baselines -lvl slide -n 8
```
@@ -33,7 +33,7 @@ python3 coach.py -p Fetch_DDPG_HER_baselines -lvl slide -n 8
### Fetch Pick And Place DDPG HER - 8 workers
```bash
-python3 coach.py -p Fetch_DDPG_HER -lvl pick_and_place -n 8
+coach -p Fetch_DDPG_HER -lvl pick_and_place -n 8
```
diff --git a/benchmarks/dfp/README.md b/benchmarks/dfp/README.md
index 01ed6ae..4259d7f 100644
--- a/benchmarks/dfp/README.md
+++ b/benchmarks/dfp/README.md
@@ -6,7 +6,7 @@ The parameters used for DFP are the same parameters as described in the [origina
### Doom Basic DFP - 8 workers
```bash
-python3 coach.py -p Doom_Basic_DFP -n 8
+coach -p Doom_Basic_DFP -n 8
```
@@ -15,7 +15,7 @@ python3 coach.py -p Doom_Basic_DFP -n 8
### Doom Health (D1: Basic) DFP - 8 workers
```bash
-python3 coach.py -p Doom_Health_DFP -n 8
+coach -p Doom_Health_DFP -n 8
```
@@ -25,7 +25,7 @@ python3 coach.py -p Doom_Health_DFP -n 8
### Doom Health Supreme (D2: Navigation) DFP - 8 workers
```bash
-python3 coach.py -p Doom_Health_Supreme_DFP -n 8
+coach -p Doom_Health_Supreme_DFP -n 8
```
diff --git a/benchmarks/dqn/README.md b/benchmarks/dqn/README.md
index d617aa3..c39697f 100644
--- a/benchmarks/dqn/README.md
+++ b/benchmarks/dqn/README.md
@@ -6,7 +6,7 @@ The parameters used for DQN are the same parameters as described in the [origina
### Breakout DQN - single worker
```bash
-python3 coach.py -p Atari_DQN -lvl breakout
+coach -p Atari_DQN -lvl breakout
```
@@ -14,7 +14,7 @@ python3 coach.py -p Atari_DQN -lvl breakout
### Pong DQN - single worker
```bash
-python3 coach.py -p Atari_DQN -lvl pong
+coach -p Atari_DQN -lvl pong
```
@@ -22,7 +22,7 @@ python3 coach.py -p Atari_DQN -lvl pong
### Space Invaders DQN - single worker
```bash
-python3 coach.py -p Atari_DQN -lvl space_invaders
+coach -p Atari_DQN -lvl space_invaders
```
diff --git a/benchmarks/dueling_ddqn/README.md b/benchmarks/dueling_ddqn/README.md
index 449e5af..9aeac30 100644
--- a/benchmarks/dueling_ddqn/README.md
+++ b/benchmarks/dueling_ddqn/README.md
@@ -3,12 +3,33 @@
Each experiment uses 3 seeds and is trained for 10k environment steps.
The parameters used for Dueling DDQN are the same parameters as described in the [original paper](https://arxiv.org/abs/1706.01502).
+### Pong Dueling DDQN - single worker
+
+```bash
+coach -p Atari_Dueling_DDQN -lvl pong
+```
+
+
+
+
### Breakout Dueling DDQN - single worker
```bash
-python3 coach.py -p Atari_Dueling_DDQN -lvl breakout
+coach -p Atari_Dueling_DDQN -lvl breakout
```
+### Space Invaders Dueling DDQN - single worker
+
+```bash
+coach -p Atari_Dueling_DDQN -lvl space_invaders
+```
+
+
+
+
+
+
+
diff --git a/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png b/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png
index 10fdd69..833e77f 100644
Binary files a/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png and b/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png differ
diff --git a/benchmarks/dueling_ddqn/pong_dueling_ddqn.png b/benchmarks/dueling_ddqn/pong_dueling_ddqn.png
new file mode 100644
index 0000000..23c9a7d
Binary files /dev/null and b/benchmarks/dueling_ddqn/pong_dueling_ddqn.png differ
diff --git a/benchmarks/dueling_ddqn/space_invaders_dueling_ddqn.png b/benchmarks/dueling_ddqn/space_invaders_dueling_ddqn.png
new file mode 100644
index 0000000..f0712b1
Binary files /dev/null and b/benchmarks/dueling_ddqn/space_invaders_dueling_ddqn.png differ
diff --git a/benchmarks/dueling_ddqn_with_per/README.md b/benchmarks/dueling_ddqn_with_per/README.md
index 6cc83be..0a69e4d 100644
--- a/benchmarks/dueling_ddqn_with_per/README.md
+++ b/benchmarks/dueling_ddqn_with_per/README.md
@@ -6,7 +6,7 @@ The parameters used for Dueling DDQN with PER are the same parameters as describ
### Breakout Dueling DDQN with PER - single worker
```bash
-python3 coach.py -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl breakout
+coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl breakout
```
@@ -15,7 +15,7 @@ python3 coach.py -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl breakout
### Pong Dueling DDQN with PER - single worker
```bash
-python3 coach.py -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl pong
+coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl pong
```
@@ -24,7 +24,7 @@ python3 coach.py -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl pong
### Space Invaders Dueling DDQN with PER - single worker
```bash
-python3 coach.py -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl space_invaders
+coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl space_invaders
```
diff --git a/benchmarks/qr_dqn/README.md b/benchmarks/qr_dqn/README.md
index e5f558c..1b1dda6 100644
--- a/benchmarks/qr_dqn/README.md
+++ b/benchmarks/qr_dqn/README.md
@@ -6,7 +6,7 @@ The parameters used for QR-DQN are the same parameters as described in the [orig
### Breakout QR-DQN - single worker
```bash
-python3 coach.py -p Atari_QR_DQN -lvl breakout
+coach -p Atari_QR_DQN -lvl breakout
```
@@ -15,7 +15,7 @@ python3 coach.py -p Atari_QR_DQN -lvl breakout
### Pong QR-DQN - single worker
```bash
-python3 coach.py -p Atari_QR_DQN -lvl pong
+coach -p Atari_QR_DQN -lvl pong
```
diff --git a/setup.py b/setup.py
index 0f15490..018259b 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ with open(path.join(here, 'README.md'), encoding='utf-8') as f:
install_requires=[
'annoy==1.8.3', 'Pillow==4.3.0', 'matplotlib==2.0.2', 'numpy==1.14.5', 'pandas==0.22.0',
'pygame==1.9.3', 'PyOpenGL==3.1.0', 'scipy==0.19.0', 'scikit-image==0.13.0',
- 'box2d==2.3.2', 'gym==0.10.5', 'gym[atari]==0.10.5', 'bokeh==0.13.0', 'futures==3.1.1', 'wxPython==4.0.1']
+ 'box2d==2.3.2', 'gym==0.10.5', 'bokeh==0.13.0', 'futures==3.1.1', 'wxPython==4.0.1']
# check if system has CUDA enabled GPU
p = subprocess.Popen(['command -v nvidia-smi'], stdout=subprocess.PIPE, shell=True)