diff --git a/README.md b/README.md
index 5088a34..af2c9bf 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,12 @@ Training an agent to solve an environment is as easy as running:
coach -p CartPole_DQN -r
```
-
+
+
+
+
+
+
Blog posts from the IntelĀ® AI website:
* [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release)
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 0c00442..cd89030 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -25,10 +25,11 @@ The environments that were used for testing include:
 *Not training*
+
| |**Status** |**Environments**|**Comments**|
| ----------------------- |:--------------------------------------------------------:|:--------------:|:--------:|
-|**[DQN](dqn)** |  |Atari | Pong is not training |
-|**[Dueling DDQN](dueling_ddqn)**|  |Atari | Pong is not training |
+|**[DQN](dqn)** |  |Atari | |
+|**[Dueling DDQN](dueling_ddqn)**|  |Atari | |
|**[Dueling DDQN with PER](dueling_ddqn_with_per)**|  |Atari | |
|**[Bootstrapped DQN](bootstrapped_dqn)**|  |Atari | |
|**[QR-DQN](qr_dqn)** |  |Atari | |
diff --git a/img/ant.gif b/img/ant.gif
new file mode 100644
index 0000000..9e328fd
Binary files /dev/null and b/img/ant.gif differ
diff --git a/img/doom_health.gif b/img/doom_health.gif
new file mode 100644
index 0000000..a8072b3
Binary files /dev/null and b/img/doom_health.gif differ
diff --git a/img/fetch_slide.gif b/img/fetch_slide.gif
new file mode 100644
index 0000000..45b3466
Binary files /dev/null and b/img/fetch_slide.gif differ
diff --git a/img/minitaur.gif b/img/minitaur.gif
new file mode 100644
index 0000000..201ce35
Binary files /dev/null and b/img/minitaur.gif differ
diff --git a/img/pendulum.gif b/img/pendulum.gif
new file mode 100644
index 0000000..551e1fd
Binary files /dev/null and b/img/pendulum.gif differ
diff --git a/img/starcraft.gif b/img/starcraft.gif
new file mode 100644
index 0000000..01c7232
Binary files /dev/null and b/img/starcraft.gif differ
diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py
index f398be4..083f26e 100644
--- a/rl_coach/graph_managers/graph_manager.py
+++ b/rl_coach/graph_managers/graph_manager.py
@@ -142,6 +142,9 @@ class GraphManager(object):
config = tf.ConfigProto()
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
+ config.gpu_options.per_process_gpu_memory_fraction = 0.2
+ config.intra_op_parallelism_threads = 1
+ config.inter_op_parallelism_threads = 1
from rl_coach.architectures.tensorflow_components.distributed_tf_utils import create_and_start_parameters_server, \
create_cluster_spec, create_worker_server_and_device
@@ -169,6 +172,8 @@ class GraphManager(object):
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
# config.gpu_options.per_process_gpu_memory_fraction = 0.2
+ config.intra_op_parallelism_threads = 1
+ config.inter_op_parallelism_threads = 1
if isinstance(task_parameters, DistributedTaskParameters):
# the distributed tensorflow setting
diff --git a/rl_coach/presets/Atari_DQN.py b/rl_coach/presets/Atari_DQN.py
index 989f6b5..103f1a9 100644
--- a/rl_coach/presets/Atari_DQN.py
+++ b/rl_coach/presets/Atari_DQN.py
@@ -20,7 +20,8 @@ schedule_params.heatup_steps = EnvironmentSteps(50000)
# Agent #
#########
agent_params = DQNAgentParameters()
-agent_params.network_wrappers['main'].learning_rate = 0.00025
+# since we are using Adam instead of RMSProp, we adjust the learning rate as well
+agent_params.network_wrappers['main'].learning_rate = 0.0001
###############
# Environment #
diff --git a/rl_coach/presets/Atari_Dueling_DDQN.py b/rl_coach/presets/Atari_Dueling_DDQN.py
index cc72655..9d3fcdd 100644
--- a/rl_coach/presets/Atari_Dueling_DDQN.py
+++ b/rl_coach/presets/Atari_Dueling_DDQN.py
@@ -23,7 +23,9 @@ schedule_params.heatup_steps = EnvironmentSteps(50000)
# Agent #
#########
agent_params = DDQNAgentParameters()
-agent_params.network_wrappers['main'].learning_rate = 0.00025
+
+# since we are using Adam instead of RMSProp, we adjust the learning rate as well
+agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
agent_params.network_wrappers['main'].rescale_gradient_from_head_by_factor = [1/math.sqrt(2)]