diff --git a/README.md b/README.md index 5088a34..af2c9bf 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,12 @@ Training an agent to solve an environment is as easy as running: coach -p CartPole_DQN -r ``` -Doom Deathmatch CARLA MontezumaRevenge +Fetch Slide Pendulum Starcraft +
+Doom Deathmatch CARLA MontezumaRevenge +
+Doom Health GatheringPyBullet Minitaur Gym Extensions Ant +

Blog posts from the IntelĀ® AI website: * [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release) diff --git a/benchmarks/README.md b/benchmarks/README.md index 0c00442..cd89030 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -25,10 +25,11 @@ The environments that were used for testing include: ![#FF4040](https://placehold.it/15/FF4040/000000?text=+) *Not training* + | |**Status** |**Environments**|**Comments**| | ----------------------- |:--------------------------------------------------------:|:--------------:|:--------:| -|**[DQN](dqn)** | ![#ceffad](https://placehold.it/15/ceffad/000000?text=+) |Atari | Pong is not training | -|**[Dueling DDQN](dueling_ddqn)**| ![#ceffad](https://placehold.it/15/ceffad/000000?text=+) |Atari | Pong is not training | +|**[DQN](dqn)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | +|**[Dueling DDQN](dueling_ddqn)**| ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[Dueling DDQN with PER](dueling_ddqn_with_per)**| ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[Bootstrapped DQN](bootstrapped_dqn)**| ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[QR-DQN](qr_dqn)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | diff --git a/img/ant.gif b/img/ant.gif new file mode 100644 index 0000000..9e328fd Binary files /dev/null and b/img/ant.gif differ diff --git a/img/doom_health.gif b/img/doom_health.gif new file mode 100644 index 0000000..a8072b3 Binary files /dev/null and b/img/doom_health.gif differ diff --git a/img/fetch_slide.gif b/img/fetch_slide.gif new file mode 100644 index 0000000..45b3466 Binary files /dev/null and b/img/fetch_slide.gif differ diff --git a/img/minitaur.gif b/img/minitaur.gif new file mode 100644 index 0000000..201ce35 Binary files /dev/null and b/img/minitaur.gif differ diff --git a/img/pendulum.gif b/img/pendulum.gif new file mode 100644 index 0000000..551e1fd Binary files /dev/null and b/img/pendulum.gif differ diff --git a/img/starcraft.gif b/img/starcraft.gif new file mode 100644 index 0000000..01c7232 Binary files /dev/null and b/img/starcraft.gif differ diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py index f398be4..083f26e 100644 --- a/rl_coach/graph_managers/graph_manager.py +++ b/rl_coach/graph_managers/graph_manager.py @@ -142,6 +142,9 @@ class GraphManager(object): config = tf.ConfigProto() config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed + config.gpu_options.per_process_gpu_memory_fraction = 0.2 + config.intra_op_parallelism_threads = 1 + config.inter_op_parallelism_threads = 1 from rl_coach.architectures.tensorflow_components.distributed_tf_utils import create_and_start_parameters_server, \ create_cluster_spec, create_worker_server_and_device @@ -169,6 +172,8 @@ class GraphManager(object): config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed # config.gpu_options.per_process_gpu_memory_fraction = 0.2 + config.intra_op_parallelism_threads = 1 + config.inter_op_parallelism_threads = 1 if isinstance(task_parameters, DistributedTaskParameters): # the distributed tensorflow setting diff --git a/rl_coach/presets/Atari_DQN.py b/rl_coach/presets/Atari_DQN.py index 989f6b5..103f1a9 100644 --- a/rl_coach/presets/Atari_DQN.py +++ b/rl_coach/presets/Atari_DQN.py @@ -20,7 +20,8 @@ schedule_params.heatup_steps = EnvironmentSteps(50000) # Agent # ######### agent_params = DQNAgentParameters() -agent_params.network_wrappers['main'].learning_rate = 0.00025 +# since we are using Adam instead of RMSProp, we adjust the learning rate as well +agent_params.network_wrappers['main'].learning_rate = 0.0001 ############### # Environment # diff --git a/rl_coach/presets/Atari_Dueling_DDQN.py b/rl_coach/presets/Atari_Dueling_DDQN.py index cc72655..9d3fcdd 100644 --- a/rl_coach/presets/Atari_Dueling_DDQN.py +++ b/rl_coach/presets/Atari_Dueling_DDQN.py @@ -23,7 +23,9 @@ schedule_params.heatup_steps = EnvironmentSteps(50000) # Agent # ######### agent_params = DDQNAgentParameters() -agent_params.network_wrappers['main'].learning_rate = 0.00025 + +# since we are using Adam instead of RMSProp, we adjust the learning rate as well +agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()] agent_params.network_wrappers['main'].rescale_gradient_from_head_by_factor = [1/math.sqrt(2)]