mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
updated gifs in README + fix for multiworker crashes + improved Atari DQN and Dueling DDQN presets
This commit is contained in:
@@ -17,7 +17,12 @@ Training an agent to solve an environment is as easy as running:
|
|||||||
coach -p CartPole_DQN -r
|
coach -p CartPole_DQN -r
|
||||||
```
|
```
|
||||||
|
|
||||||
<img src="img/doom_deathmatch.gif" alt="Doom Deathmatch" width="267" height="200"/> <img src="img/carla.gif" alt="CARLA" width="284" height="200"/> <img src="img/montezuma.gif" alt="MontezumaRevenge" width="152" height="200"/>
|
<img src="img/fetch_slide.gif" alt="Fetch Slide"/> <img src="img/pendulum.gif" alt="Pendulum"/> <img src="img/starcraft.gif" alt="Starcraft"/>
|
||||||
|
<br>
|
||||||
|
<img src="img/doom_deathmatch.gif" alt="Doom Deathmatch"/> <img src="img/carla.gif" alt="CARLA"/> <img src="img/montezuma.gif" alt="MontezumaRevenge"/>
|
||||||
|
<br>
|
||||||
|
<img src="img/doom_health.gif" alt="Doom Health Gathering"/><img src="img/minitaur.gif" alt="PyBullet Minitaur"/> <img src="img/ant.gif" alt="Gym Extensions Ant"/>
|
||||||
|
<br><br>
|
||||||
|
|
||||||
Blog posts from the Intel® AI website:
|
Blog posts from the Intel® AI website:
|
||||||
* [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release)
|
* [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release)
|
||||||
|
|||||||
@@ -25,10 +25,11 @@ The environments that were used for testing include:
|
|||||||
 *Not training*
|
 *Not training*
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
| |**Status** |**Environments**|**Comments**|
|
| |**Status** |**Environments**|**Comments**|
|
||||||
| ----------------------- |:--------------------------------------------------------:|:--------------:|:--------:|
|
| ----------------------- |:--------------------------------------------------------:|:--------------:|:--------:|
|
||||||
|**[DQN](dqn)** |  |Atari | Pong is not training |
|
|**[DQN](dqn)** |  |Atari | |
|
||||||
|**[Dueling DDQN](dueling_ddqn)**|  |Atari | Pong is not training |
|
|**[Dueling DDQN](dueling_ddqn)**|  |Atari | |
|
||||||
|**[Dueling DDQN with PER](dueling_ddqn_with_per)**|  |Atari | |
|
|**[Dueling DDQN with PER](dueling_ddqn_with_per)**|  |Atari | |
|
||||||
|**[Bootstrapped DQN](bootstrapped_dqn)**|  |Atari | |
|
|**[Bootstrapped DQN](bootstrapped_dqn)**|  |Atari | |
|
||||||
|**[QR-DQN](qr_dqn)** |  |Atari | |
|
|**[QR-DQN](qr_dqn)** |  |Atari | |
|
||||||
|
|||||||
BIN
img/ant.gif
Normal file
BIN
img/ant.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.5 MiB |
BIN
img/doom_health.gif
Normal file
BIN
img/doom_health.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.8 MiB |
BIN
img/fetch_slide.gif
Normal file
BIN
img/fetch_slide.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 712 KiB |
BIN
img/minitaur.gif
Normal file
BIN
img/minitaur.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 846 KiB |
BIN
img/pendulum.gif
Normal file
BIN
img/pendulum.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 754 KiB |
BIN
img/starcraft.gif
Normal file
BIN
img/starcraft.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 MiB |
@@ -142,6 +142,9 @@ class GraphManager(object):
|
|||||||
config = tf.ConfigProto()
|
config = tf.ConfigProto()
|
||||||
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
||||||
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
||||||
|
config.gpu_options.per_process_gpu_memory_fraction = 0.2
|
||||||
|
config.intra_op_parallelism_threads = 1
|
||||||
|
config.inter_op_parallelism_threads = 1
|
||||||
|
|
||||||
from rl_coach.architectures.tensorflow_components.distributed_tf_utils import create_and_start_parameters_server, \
|
from rl_coach.architectures.tensorflow_components.distributed_tf_utils import create_and_start_parameters_server, \
|
||||||
create_cluster_spec, create_worker_server_and_device
|
create_cluster_spec, create_worker_server_and_device
|
||||||
@@ -169,6 +172,8 @@ class GraphManager(object):
|
|||||||
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
||||||
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
||||||
# config.gpu_options.per_process_gpu_memory_fraction = 0.2
|
# config.gpu_options.per_process_gpu_memory_fraction = 0.2
|
||||||
|
config.intra_op_parallelism_threads = 1
|
||||||
|
config.inter_op_parallelism_threads = 1
|
||||||
|
|
||||||
if isinstance(task_parameters, DistributedTaskParameters):
|
if isinstance(task_parameters, DistributedTaskParameters):
|
||||||
# the distributed tensorflow setting
|
# the distributed tensorflow setting
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ schedule_params.heatup_steps = EnvironmentSteps(50000)
|
|||||||
# Agent #
|
# Agent #
|
||||||
#########
|
#########
|
||||||
agent_params = DQNAgentParameters()
|
agent_params = DQNAgentParameters()
|
||||||
agent_params.network_wrappers['main'].learning_rate = 0.00025
|
# since we are using Adam instead of RMSProp, we adjust the learning rate as well
|
||||||
|
agent_params.network_wrappers['main'].learning_rate = 0.0001
|
||||||
|
|
||||||
###############
|
###############
|
||||||
# Environment #
|
# Environment #
|
||||||
|
|||||||
@@ -23,7 +23,9 @@ schedule_params.heatup_steps = EnvironmentSteps(50000)
|
|||||||
# Agent #
|
# Agent #
|
||||||
#########
|
#########
|
||||||
agent_params = DDQNAgentParameters()
|
agent_params = DDQNAgentParameters()
|
||||||
agent_params.network_wrappers['main'].learning_rate = 0.00025
|
|
||||||
|
# since we are using Adam instead of RMSProp, we adjust the learning rate as well
|
||||||
|
agent_params.network_wrappers['main'].learning_rate = 0.0001
|
||||||
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty
|
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty
|
||||||
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
|
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
|
||||||
agent_params.network_wrappers['main'].rescale_gradient_from_head_by_factor = [1/math.sqrt(2)]
|
agent_params.network_wrappers['main'].rescale_gradient_from_head_by_factor = [1/math.sqrt(2)]
|
||||||
|
|||||||
Reference in New Issue
Block a user