From f12857a8c78ea6ddd315126dd2f42e1e03c72c71 Mon Sep 17 00:00:00 2001 From: Gal Leibovich Date: Wed, 5 Dec 2018 23:16:16 +0200 Subject: [PATCH] Docs changes - fixing blogpost links, removing importing all exploration policies (#139) * updated docs * removing imports for all exploration policies in __init__ + setting the right blog-post link * small cleanups --- README.md | 2 +- docs/_modules/rl_coach/agents/agent.html | 2 - docs/_modules/rl_coach/agents/dqn_agent.html | 3 +- docs/_modules/rl_coach/base_parameters.html | 3 + .../rl_coach/data_stores/nfs_data_store.html | 19 +++++- .../rl_coach/data_stores/s3_data_store.html | 19 +++++- .../exploration_policies/additive_noise.html | 2 +- .../exploration_policies/boltzmann.html | 2 +- .../exploration_policies/bootstrapped.html | 2 +- .../exploration_policies/categorical.html | 2 +- .../continuous_entropy.html | 2 +- .../exploration_policies/e_greedy.html | 2 +- .../exploration_policy.html | 10 +-- .../rl_coach/exploration_policies/greedy.html | 2 +- .../exploration_policies/ou_process.html | 2 +- .../exploration_policies/parameter_noise.html | 5 +- .../truncated_normal.html | 2 +- .../rl_coach/exploration_policies/ucb.html | 2 +- .../rl_coach/memories/backend/redis.html | 18 ++++- .../non_episodic/transition_collection.html | 19 +++++- .../kubernetes_orchestrator.html | 19 +++++- .../exploration_policies/index.rst.txt | 24 +++---- docs/_sources/index.rst.txt | 2 +- .../exploration_policies/index.html | 64 +++++++++--------- docs/genindex.html | 32 ++++----- docs/index.html | 2 +- docs/objects.inv | Bin 3515 -> 3571 bytes docs/searchindex.js | 2 +- .../components/exploration_policies/index.rst | 24 +++---- docs_raw/source/index.rst | 2 +- rl_coach/agents/agent.py | 2 - rl_coach/coach.py | 3 +- rl_coach/exploration_policies/__init__.py | 55 --------------- 33 files changed, 191 insertions(+), 160 deletions(-) diff --git a/README.md b/README.md index b341cbb..f93cddd 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Blog posts from the Intel® AI website: * [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release) * [Release 0.9.0](https://ai.intel.com/reinforcement-learning-coach-carla-qr-dqn/) * [Release 0.10.0](https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/) -* Release 0.11 (current release) +* [Release 0.11.0](https://ai.intel.com/rl-coach-data-science-at-scale) (current release) Contacting the Coach development team is also possible through the email [coach@intel.com](coach@intel.com) diff --git a/docs/_modules/rl_coach/agents/agent.html b/docs/_modules/rl_coach/agents/agent.html index 4c84795..896a832 100644 --- a/docs/_modules/rl_coach/agents/agent.html +++ b/docs/_modules/rl_coach/agents/agent.html @@ -195,13 +195,11 @@ # import copy -import os import random from collections import OrderedDict from typing import Dict, List, Union, Tuple import numpy as np -from pandas import read_pickle from six.moves import range from rl_coach.agents.agent_interface import AgentInterface diff --git a/docs/_modules/rl_coach/agents/dqn_agent.html b/docs/_modules/rl_coach/agents/dqn_agent.html index 7e99453..b956527 100644 --- a/docs/_modules/rl_coach/agents/dqn_agent.html +++ b/docs/_modules/rl_coach/agents/dqn_agent.html @@ -215,7 +215,8 @@ super().__init__() self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(10000) self.num_consecutive_playing_steps = EnvironmentSteps(4) - self.discount = 0.99 + self.discount = 0.99 + self.supports_parameter_noise = True class DQNNetworkParameters(NetworkParameters): diff --git a/docs/_modules/rl_coach/base_parameters.html b/docs/_modules/rl_coach/base_parameters.html index f595114..9a0297f 100644 --- a/docs/_modules/rl_coach/base_parameters.html +++ b/docs/_modules/rl_coach/base_parameters.html @@ -391,6 +391,9 @@ # Should the workers wait for full episode self.act_for_full_episodes = False + # Support for parameter noise + self.supports_parameter_noise = False +
[docs]class PresetValidationParameters(Parameters): def __init__(self, diff --git a/docs/_modules/rl_coach/data_stores/nfs_data_store.html b/docs/_modules/rl_coach/data_stores/nfs_data_store.html index ec92028..d86c06a 100644 --- a/docs/_modules/rl_coach/data_stores/nfs_data_store.html +++ b/docs/_modules/rl_coach/data_stores/nfs_data_store.html @@ -178,7 +178,24 @@

Source code for rl_coach.data_stores.nfs_data_store

-import uuid
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import uuid
 
 from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
 
diff --git a/docs/_modules/rl_coach/data_stores/s3_data_store.html b/docs/_modules/rl_coach/data_stores/s3_data_store.html
index 40aba1d..dc9fd7e 100644
--- a/docs/_modules/rl_coach/data_stores/s3_data_store.html
+++ b/docs/_modules/rl_coach/data_stores/s3_data_store.html
@@ -178,7 +178,24 @@
            

Source code for rl_coach.data_stores.s3_data_store

-from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
 from minio import Minio
 from minio.error import ResponseError
 from configparser import ConfigParser, Error
diff --git a/docs/_modules/rl_coach/exploration_policies/additive_noise.html b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
index 83c73ff..1bd8dca 100644
--- a/docs/_modules/rl_coach/exploration_policies/additive_noise.html
+++ b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
@@ -216,7 +216,7 @@
         return 'rl_coach.exploration_policies.additive_noise:AdditiveNoise'
 
 
-
[docs]class AdditiveNoise(ExplorationPolicy): +
[docs]class AdditiveNoise(ExplorationPolicy): """ AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that diff --git a/docs/_modules/rl_coach/exploration_policies/boltzmann.html b/docs/_modules/rl_coach/exploration_policies/boltzmann.html index ad34b34..a71de9d 100644 --- a/docs/_modules/rl_coach/exploration_policies/boltzmann.html +++ b/docs/_modules/rl_coach/exploration_policies/boltzmann.html @@ -215,7 +215,7 @@ -
[docs]class Boltzmann(ExplorationPolicy): +
[docs]class Boltzmann(ExplorationPolicy): """ The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values diff --git a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html index 35058a4..ea3ac97 100644 --- a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html +++ b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html @@ -218,7 +218,7 @@ return 'rl_coach.exploration_policies.bootstrapped:Bootstrapped' -
[docs]class Bootstrapped(EGreedy): +
[docs]class Bootstrapped(EGreedy): """ Bootstrapped exploration policy is currently only used for discrete action spaces along with the Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the diff --git a/docs/_modules/rl_coach/exploration_policies/categorical.html b/docs/_modules/rl_coach/exploration_policies/categorical.html index edfcf2a..18925ee 100644 --- a/docs/_modules/rl_coach/exploration_policies/categorical.html +++ b/docs/_modules/rl_coach/exploration_policies/categorical.html @@ -209,7 +209,7 @@ return 'rl_coach.exploration_policies.categorical:Categorical' -
[docs]class Categorical(ExplorationPolicy): +
[docs]class Categorical(ExplorationPolicy): """ Categorical exploration policy is intended for discrete action spaces. It expects the action values to represent a probability distribution over the action, from which a single action will be sampled. diff --git a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html index 6fb3c16..39ac379 100644 --- a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html +++ b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html @@ -203,7 +203,7 @@ return 'rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy' -
[docs]class ContinuousEntropy(AdditiveNoise): +
[docs]class ContinuousEntropy(AdditiveNoise): """ Continuous entropy is an exploration policy that is actually implemented as part of the network. The exploration policy class is only a placeholder for choosing this policy. The exploration policy is diff --git a/docs/_modules/rl_coach/exploration_policies/e_greedy.html b/docs/_modules/rl_coach/exploration_policies/e_greedy.html index deecf18..2e88e22 100644 --- a/docs/_modules/rl_coach/exploration_policies/e_greedy.html +++ b/docs/_modules/rl_coach/exploration_policies/e_greedy.html @@ -222,7 +222,7 @@ return 'rl_coach.exploration_policies.e_greedy:EGreedy' -
[docs]class EGreedy(ExplorationPolicy): +
[docs]class EGreedy(ExplorationPolicy): """ e-greedy is an exploration policy that is intended for both discrete and continuous action spaces. diff --git a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html index e8b56bd..bef11d5 100644 --- a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html +++ b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html @@ -210,7 +210,7 @@ return 'rl_coach.exploration_policies.exploration_policy:ExplorationPolicy' -
[docs]class ExplorationPolicy(object): +
[docs]class ExplorationPolicy(object): """ An exploration policy takes the predicted actions or action values from the agent, and selects the action to actually apply to the environment using some predefined algorithm. @@ -222,14 +222,14 @@ self.phase = RunPhase.HEATUP self.action_space = action_space -
[docs] def reset(self): +
[docs] def reset(self): """ Used for resetting the exploration policy parameters when needed :return: None """ pass
-
[docs] def get_action(self, action_values: List[ActionType]) -> ActionType: +
[docs] def get_action(self, action_values: List[ActionType]) -> ActionType: """ Given a list of values corresponding to each action, choose one actions according to the exploration policy @@ -243,7 +243,7 @@ else: raise ValueError("The get_action function should be overridden in the inheriting exploration class")
-
[docs] def change_phase(self, phase): +
[docs] def change_phase(self, phase): """ Change between running phases of the algorithm :param phase: Either Heatup or Train @@ -251,7 +251,7 @@ """ self.phase = phase
-
[docs] def requires_action_values(self) -> bool: +
[docs] def requires_action_values(self) -> bool: """ Allows exploration policies to define if they require the action values for the current step. This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller diff --git a/docs/_modules/rl_coach/exploration_policies/greedy.html b/docs/_modules/rl_coach/exploration_policies/greedy.html index fe0d0fd..8bcfca2 100644 --- a/docs/_modules/rl_coach/exploration_policies/greedy.html +++ b/docs/_modules/rl_coach/exploration_policies/greedy.html @@ -209,7 +209,7 @@ return 'rl_coach.exploration_policies.greedy:Greedy' -
[docs]class Greedy(ExplorationPolicy): +
[docs]class Greedy(ExplorationPolicy): """ The Greedy exploration policy is intended for both discrete and continuous action spaces. For discrete action spaces, it always selects the action with the maximum value, as given by the agent. diff --git a/docs/_modules/rl_coach/exploration_policies/ou_process.html b/docs/_modules/rl_coach/exploration_policies/ou_process.html index 0df44f5..15e0dcb 100644 --- a/docs/_modules/rl_coach/exploration_policies/ou_process.html +++ b/docs/_modules/rl_coach/exploration_policies/ou_process.html @@ -219,7 +219,7 @@ # Ornstein-Uhlenbeck process -
[docs]class OUProcess(ExplorationPolicy): +
[docs]class OUProcess(ExplorationPolicy): """ OUProcess exploration policy is intended for continuous action spaces, and selects the action according to an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where diff --git a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html index dcd0aea..fd5b326 100644 --- a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html +++ b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html @@ -210,7 +210,8 @@ class ParameterNoiseParameters(ExplorationParameters): def __init__(self, agent_params: AgentParameters): super().__init__() - if not isinstance(agent_params, DQNAgentParameters): + + if not agent_params.algorithm.supports_parameter_noise: raise ValueError("Currently only DQN variants are supported for using an exploration type of " "ParameterNoise.") @@ -221,7 +222,7 @@ return 'rl_coach.exploration_policies.parameter_noise:ParameterNoise' -
[docs]class ParameterNoise(ExplorationPolicy): +
[docs]class ParameterNoise(ExplorationPolicy): """ The ParameterNoise exploration policy is intended for both discrete and continuous action spaces. It applies the exploration policy by replacing all the dense network layers with noisy layers. diff --git a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html index 04a6205..11b9bfc 100644 --- a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html +++ b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html @@ -218,7 +218,7 @@ return 'rl_coach.exploration_policies.truncated_normal:TruncatedNormal' -
[docs]class TruncatedNormal(ExplorationPolicy): +
[docs]class TruncatedNormal(ExplorationPolicy): """ The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t diff --git a/docs/_modules/rl_coach/exploration_policies/ucb.html b/docs/_modules/rl_coach/exploration_policies/ucb.html index 88b0978..562dcba 100644 --- a/docs/_modules/rl_coach/exploration_policies/ucb.html +++ b/docs/_modules/rl_coach/exploration_policies/ucb.html @@ -222,7 +222,7 @@ return 'rl_coach.exploration_policies.ucb:UCB' -
[docs]class UCB(EGreedy): +
[docs]class UCB(EGreedy): """ UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces. It assumes that there are multiple network heads that are predicting action values, and that the standard deviation diff --git a/docs/_modules/rl_coach/memories/backend/redis.html b/docs/_modules/rl_coach/memories/backend/redis.html index 842700d..b00fab0 100644 --- a/docs/_modules/rl_coach/memories/backend/redis.html +++ b/docs/_modules/rl_coach/memories/backend/redis.html @@ -178,7 +178,23 @@

Source code for rl_coach.memories.backend.redis

-
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
 import redis
 import pickle
 import uuid
diff --git a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
index cb6bf56..0fcd72b 100644
--- a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
+++ b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
@@ -178,7 +178,24 @@
            

Source code for rl_coach.memories.non_episodic.transition_collection

-from rl_coach.core_types import Transition
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from rl_coach.core_types import Transition
 
 
 
[docs]class TransitionCollection(object): diff --git a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html index 83db11f..9932e81 100644 --- a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html +++ b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html @@ -178,7 +178,24 @@

Source code for rl_coach.orchestrators.kubernetes_orchestrator

-import os
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import os
 import uuid
 import json
 import time
diff --git a/docs/_sources/components/exploration_policies/index.rst.txt b/docs/_sources/components/exploration_policies/index.rst.txt
index 10b6c77..3d56dcc 100644
--- a/docs/_sources/components/exploration_policies/index.rst.txt
+++ b/docs/_sources/components/exploration_policies/index.rst.txt
@@ -38,50 +38,50 @@ spaces.
 
 ExplorationPolicy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
+.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
    :members:
    :inherited-members:
 
 AdditiveNoise
 -------------
-.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
+.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
 
 Boltzmann
 ---------
-.. autoclass:: rl_coach.exploration_policies.Boltzmann
+.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
 
 Bootstrapped
 ------------
-.. autoclass:: rl_coach.exploration_policies.Bootstrapped
+.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
 
 Categorical
 -----------
-.. autoclass:: rl_coach.exploration_policies.Categorical
+.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
 
 ContinuousEntropy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
+.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
 
 EGreedy
 -------
-.. autoclass:: rl_coach.exploration_policies.EGreedy
+.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
 
 Greedy
 ------
-.. autoclass:: rl_coach.exploration_policies.Greedy
+.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
 
 OUProcess
 ---------
-.. autoclass:: rl_coach.exploration_policies.OUProcess
+.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
 
 ParameterNoise
 --------------
-.. autoclass:: rl_coach.exploration_policies.ParameterNoise
+.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
 
 TruncatedNormal
 ---------------
-.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
+.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
 
 UCB
 ---
-.. autoclass:: rl_coach.exploration_policies.UCB
\ No newline at end of file
+.. autoclass:: rl_coach.exploration_policies.ucb.UCB
\ No newline at end of file
diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt
index ca786ee..16c7024 100644
--- a/docs/_sources/index.rst.txt
+++ b/docs/_sources/index.rst.txt
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
 
 * `Release 0.10.0 `_
 
-* `Release 0.11.0 `_ (current release)
+* `Release 0.11.0 `_ (current release)
 
 You can find more details in the `GitHub repository `_.
 
diff --git a/docs/components/exploration_policies/index.html b/docs/components/exploration_policies/index.html
index f9d658a..388bbc8 100644
--- a/docs/components/exploration_policies/index.html
+++ b/docs/components/exploration_policies/index.html
@@ -264,8 +264,8 @@ spaces.

ExplorationPolicy

-
-class rl_coach.exploration_policies.ExplorationPolicy(action_space: rl_coach.spaces.ActionSpace)[source]
+
+class rl_coach.exploration_policies.exploration_policy.ExplorationPolicy(action_space: rl_coach.spaces.ActionSpace)[source]

An exploration policy takes the predicted actions or action values from the agent, and selects the action to actually apply to the environment using some predefined algorithm.

@@ -277,16 +277,16 @@ actually apply to the environment using some predefined algorithm.

-
-change_phase(phase)[source]
+
+change_phase(phase)[source]

Change between running phases of the algorithm :param phase: Either Heatup or Train :return: none

-
-get_action(action_values: List[Union[int, float, numpy.ndarray, List]]) → Union[int, float, numpy.ndarray, List][source]
+
+get_action(action_values: List[Union[int, float, numpy.ndarray, List]]) → Union[int, float, numpy.ndarray, List][source]

Given a list of values corresponding to each action, choose one actions according to the exploration policy :param action_values: A list of action values @@ -294,8 +294,8 @@ choose one actions according to the exploration policy

-
-requires_action_values() → bool[source]
+
+requires_action_values() → bool[source]

Allows exploration policies to define if they require the action values for the current step. This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller than epsilon, the action is completely random, and the action values don’t need to be calculated @@ -303,8 +303,8 @@ than epsilon, the action is completely random, and the action values don’t nee

-
-reset()[source]
+
+reset()[source]

Used for resetting the exploration policy parameters when needed :return: None

@@ -315,8 +315,8 @@ than epsilon, the action is completely random, and the action values don’t nee

AdditiveNoise

-
-class rl_coach.exploration_policies.AdditiveNoise(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]
+
+class rl_coach.exploration_policies.additive_noise.AdditiveNoise(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]

AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that can be given in two different ways: @@ -343,8 +343,8 @@ of the action space

Boltzmann

-
-class rl_coach.exploration_policies.Boltzmann(action_space: rl_coach.spaces.ActionSpace, temperature_schedule: rl_coach.schedules.Schedule)[source]
+
+class rl_coach.exploration_policies.boltzmann.Boltzmann(action_space: rl_coach.spaces.ActionSpace, temperature_schedule: rl_coach.schedules.Schedule)[source]

The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values into a distribution over the actions. It then samples the action for playing out of the calculated distribution. @@ -367,8 +367,8 @@ An additional temperature schedule can be given by the user, and will control th

Bootstrapped

-
-class rl_coach.exploration_policies.Bootstrapped(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+
+class rl_coach.exploration_policies.bootstrapped.Bootstrapped(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]

Bootstrapped exploration policy is currently only used for discrete action spaces along with the Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the values for all the possible actions. For each episode, a single head is selected to lead the agent, according @@ -401,8 +401,8 @@ if the e-greedy is used for a continuous policy

Categorical

-
-class rl_coach.exploration_policies.Categorical(action_space: rl_coach.spaces.ActionSpace)[source]
+
+class rl_coach.exploration_policies.categorical.Categorical(action_space: rl_coach.spaces.ActionSpace)[source]

Categorical exploration policy is intended for discrete action spaces. It expects the action values to represent a probability distribution over the action, from which a single action will be sampled. In evaluation, the action that has the highest probability will be selected. This is particularly useful for @@ -421,8 +421,8 @@ actor-critic schemes, where the actors output is a probability distribution over

ContinuousEntropy

-
-class rl_coach.exploration_policies.ContinuousEntropy(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]
+
+class rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]

Continuous entropy is an exploration policy that is actually implemented as part of the network. The exploration policy class is only a placeholder for choosing this policy. The exploration policy is implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action. @@ -453,8 +453,8 @@ of the action space

EGreedy

-
-class rl_coach.exploration_policies.EGreedy(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+
+class rl_coach.exploration_policies.e_greedy.EGreedy(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]

e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.

For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the @@ -485,8 +485,8 @@ if the e-greedy is used for a continuous policy

Greedy

-
-class rl_coach.exploration_policies.Greedy(action_space: rl_coach.spaces.ActionSpace)[source]
+
+class rl_coach.exploration_policies.greedy.Greedy(action_space: rl_coach.spaces.ActionSpace)[source]

The Greedy exploration policy is intended for both discrete and continuous action spaces. For discrete action spaces, it always selects the action with the maximum value, as given by the agent. For continuous action spaces, it always return the exact action, as it was given by the agent.

@@ -504,8 +504,8 @@ For continuous action spaces, it always return the exact action, as it was given

OUProcess

-
-class rl_coach.exploration_policies.OUProcess(action_space: rl_coach.spaces.ActionSpace, mu: float = 0, theta: float = 0.15, sigma: float = 0.2, dt: float = 0.01)[source]
+
+class rl_coach.exploration_policies.ou_process.OUProcess(action_space: rl_coach.spaces.ActionSpace, mu: float = 0, theta: float = 0.15, sigma: float = 0.2, dt: float = 0.01)[source]

OUProcess exploration policy is intended for continuous action spaces, and selects the action according to an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where the samples are correlated between consequent time steps.

@@ -523,8 +523,8 @@ the samples are correlated between consequent time steps.

ParameterNoise

-
-class rl_coach.exploration_policies.ParameterNoise(network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace)[source]
+
+class rl_coach.exploration_policies.parameter_noise.ParameterNoise(network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace)[source]

The ParameterNoise exploration policy is intended for both discrete and continuous action spaces. It applies the exploration policy by replacing all the dense network layers with noisy layers. The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network @@ -545,8 +545,8 @@ values.

TruncatedNormal

-
-class rl_coach.exploration_policies.TruncatedNormal(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float, clip_low: float, clip_high: float)[source]
+
+class rl_coach.exploration_policies.truncated_normal.TruncatedNormal(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float, clip_low: float, clip_high: float)[source]

The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t wo different ways: @@ -575,8 +575,8 @@ of the action space

UCB

-
-class rl_coach.exploration_policies.UCB(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, lamb: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+
+class rl_coach.exploration_policies.ucb.UCB(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, lamb: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]

UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces. It assumes that there are multiple network heads that are predicting action values, and that the standard deviation between the heads predictions represents the uncertainty of the agent in each of the actions. diff --git a/docs/genindex.html b/docs/genindex.html index 8198172..e4ca940 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -226,7 +226,7 @@

  • ActorCriticAlgorithmParameters (class in rl_coach.agents.actor_critic_agent)
  • -
  • AdditiveNoise (class in rl_coach.exploration_policies) +
  • AdditiveNoise (class in rl_coach.exploration_policies.additive_noise)
  • CarlaEnvironment (class in rl_coach.environments.carla_environment)
  • -
  • Categorical (class in rl_coach.exploration_policies) +
  • Categorical (class in rl_coach.exploration_policies.categorical)
  • CategoricalDQNAlgorithmParameters (class in rl_coach.agents.categorical_dqn_agent)
  • -
  • change_phase() (rl_coach.exploration_policies.ExplorationPolicy method) +
  • change_phase() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
  • choose_action() (rl_coach.agents.agent.Agent method) @@ -328,7 +328,7 @@
  • construct() (rl_coach.architectures.architecture.Architecture static method)
  • -
  • ContinuousEntropy (class in rl_coach.exploration_policies) +
  • ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)
  • ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)
  • @@ -368,7 +368,7 @@

    E

    @@ -416,7 +416,7 @@ -
  • OUProcess (class in rl_coach.exploration_policies) +
  • OUProcess (class in rl_coach.exploration_policies.ou_process)
  • @@ -640,7 +640,7 @@
  • parallel_prediction() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
  • -
  • ParameterNoise (class in rl_coach.exploration_policies) +
  • ParameterNoise (class in rl_coach.exploration_policies.parameter_noise)
  • parent (rl_coach.agents.agent.Agent attribute) @@ -714,9 +714,9 @@
  • render() (rl_coach.environments.environment.Environment method)
  • -
  • requires_action_values() (rl_coach.exploration_policies.ExplorationPolicy method) +
  • requires_action_values() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
  • -
  • reset() (rl_coach.exploration_policies.ExplorationPolicy method) +
  • reset() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
  • reset_accumulated_gradients() (rl_coach.architectures.architecture.Architecture method)
  • @@ -870,7 +870,7 @@
  • TransitionCollection (class in rl_coach.memories.non_episodic)
  • -
  • TruncatedNormal (class in rl_coach.exploration_policies) +
  • TruncatedNormal (class in rl_coach.exploration_policies.truncated_normal)
  • @@ -878,7 +878,7 @@

    U

    You can find more details in the GitHub repository.

    diff --git a/docs/objects.inv b/docs/objects.inv index b89806e0077b8615925f45085f97c5d6881eaf89..4c3b3497bb462a57f67ddc73cbea16f61a4c8131 100644 GIT binary patch delta 3446 zcmV-+4T0_6AM z+2}?C&5JZ+C|gg_3R5YjD3gq*87GotsZN_-s-$b3a#^ex{8*PW#8HVQ<|3&!e`f^} zBKcR6v9e-R;e0S~A1YRmY?HD|l9K#Jk}&!KWz)4Ri$DHME`KKcExln>fBnFO=J@p> z991QS1;t&n{8X?E6-8R&lJQMM&^2QMr%+I6pNO!mC~ziIfu3zgMZ)D6Nt{wFUl@Oi z2#m|RP{_~_DWj?6h)~Sq$~ zxgvt)@E^X+lL&Qd!V;{jHPTk(mcH z(1`0Vuc;ERZt+Q%q);6&QU|M*bChc6?rjOgs-sWE;~*3u)C|iB2UUQ?lrsunU?HOF zEC5-KYb(O1C9qJq%6NFO*W1v~r$KZh0pn_;AwZKa??~ztc z)LIEbxPL&KbWtytr*`MUnIl)%!Bz;^9@5wz3yjO#Nsj{@_Vm%)I0P0C=zKr=EJ-)lOCqYavi!BBlBVlYU*Sr`t} zX9|X+>^J|yF%LBPf`NTz-VtDXO}is8J?7joh&~f8952*t3uf?{YT;mqnP*4g?lH-Z z!hh^D!;S&!H@(7<-kMv%2)ia$FvNgabu7+4Q|egUbexyBQ3%n}!-ic0-+#Q0!iGJKS|Mk3 z&Mv>kU;{u8o3Y=ju_V8bhBJZYScLjohLfpUh0st+>d~J$J-A4JmQ%A*GdnwM*uA;2**1 z%+D7N(hu|Ly?Oq}JVsn^{6ueMuU-U{(X{_e&G=;jyJJ&7G6k=YOP)PiLged(J$&1MqGm-0eUhuC#_I{0k&-Yq!jJ ziVyZ!9te6vgxz}1ACY+451;3Hg9Sud3uQl#ZLaWk%D-=IbL2_*`jaH3O+vq0| zHB{ku+$v;hA53m47{ixZ0DhrMHh|fB*9&yzl!;R@rpSM^GYo76t&lR`5`Pnj$50Uo zir7YvgOnnX7BF+;Alj|JoQRCoRB7WJzaXARsyfB5)*TuNFly<ll5pb%wGM!j>)OJG5{?BuHY%7y%}|U) zK%o1%jD{vsTvl=u3$MU*C4bkk(5zM$Uot*+`aN_GK%r2ooywq>E=9V1p5EWiZ{TUJ z9tNOWUk@r+Z3gjIn_&J|4E&ZuSREGR1#0_#T&V+`h;v9d#W0|{dgwK>j+Z#B78R4DVUf$<2lf{8(?7LR77cYi8%-Sly$S_~h* zjm-LtNTGKqpYTSUhf}G%=V1Z~m_u{=FX~06R43dxNO4kvnS2ILrcMx@Z9;@xJd4Mw zwIj$>p0`Ff9KEjlZc)W!GJ|IPb_}^k6pvM$ZO)qmvFDo&%BGE1tJS8vdVkuCvUo!y z@r%j>lU9dv_ zE)k{EEB9+eE+&Q4JGME{Z68KsY~4?3-+md7542c9YgU6U{eShH3&QcQn!qaOv}x^j z7w=vuSOLc?gPjl~&j(hg6=xY1V*2gJ%={XStZ_EjWukFZjJr-qUein;6`_omAWk1$ z!o7YTjkwMh(;wGYG4RbvePK=I>7|r_p$;h$b+biU&(8pp{rCf`WmU`6VeMtPVy|Q4 z-y!j2{o4_NZ-48euqDUFb2V4W;~lCZ6MZEVBM$X=WV$l3@nMEYf<~^3x!0sSc=~Po zTVFthzPZ(}?I9!dG%nCC&cSoXp7G@?5$E7C!o|*_(J7>qEZ>h z%Jf118%1h$7{;5|9U430q7dk7=h}>gp2jqvsv~Uzh~V$6Ucel_G8qFr=GuB{zaG|7 z8XmKeNHXgVBN?dA7;C&QfflH&iik+*3z6&8cYoX~1oIgcX{nCzz$h>k&+$^lZt!g9 z?8SFx=jnY9=F;gP*Xq@p=BFd|SXf6QeNrd?;sEF)wn{|bVaZ8$I$q34)`HD#F^!ko z=MxG5HC|Vx3F-I*+99fsiTk`;A@~YV7dRGTWKq*XK26f>0G%#qT99MNnJ>!8-nLLq zn14=pbs@E=YvT|IS&KTpV~2A-Qa_lI?wli9e{|5X(UL!Q=|B(KFZ&z;RtLbeEZ4aAq~R`n?Q((jj#yyWUoZ|fvo%YTkG}7;js?G zi_%0$Ol~l);17)P5~i_$HgJV;!8Z;p$oiN*Si4U6Ofc8isMJ9N-6Kg*j~vqou73%R ztM5@mpcw&yJ>L^uVY(W{ zh1h;-r-6PBpgxAPAyaeidt$E_iUS#ITC+JCd|^+#h0r?nBB61PhaRldxh5tZuL?rlxYrNPOe z0h+=KV1qirxT=+6;WmsDdrM$)Xt1VWW*6WonSrLHA|+DUs(M({o1jI(MQFu3N?bst zWE)ZG@PTfF76lie9qcHvag-+hJHkU81GkBjc8rY9INIL38Gw9qF$YEjD1TXx>UrY3 z12cfue^(|^S8Nh}$EJh2d$e-^cHj(X(mFlb1s*)bGDP4%_L8&?TA5{;bZfHpc@1-P z-Z8Mok2ZkR@}BKQeCugTzNjQ0e)k~89)g}z8_74F)<@BG3=BZ`cn^|0n>N|!YiCS$Ows|kmc5#2yH9ge1)vkELheHCzT}L_CO(8M*c`X6n%8c0-bC#^U+9gD3-h-AzCa6H>@VXVe+HCtOH>QBqIP*8H`VD5_=kS! zVwYgDeYaEBE`sO#B}QgdLM}XDXV~{-+hNu`Zy^?#X6q8Eh%0>%+ibUE;-0qXJ zq;pMScQI+++mQ#|9B(VfZQ2Cp-1dW}A6%*v&G2)wgWG!J0jq6ro?(40!YN6%p-WKQ zb}6dOh0Cx4Pe(nsWCAPAq5FkNv(m@p&!BVRUG4S-lK|1Q>8*B2``>U#HZMVR+qV-H YleP`f#HdX!COd=f?RcB~AKZ5x*x&QzG delta 3390 zcmV-E4Z-sB8@n5jo`2i2lhnqJY&n_Q6AL0C31t#s0MfSn>$mZuNQ;C731lvb1(M%~ zXQLb4Xr9FhL&;`})|i%JijtD?IN_uuNvz7Im$K+um0V_P27hewIpQeCCFUY3wtpuX z5+eFnl(4*DRN{QnaGwg6kz^aQq9i$aM4{ zE?F#K`s0AB)J(>3fCb`4e4?`CWLcH?Y{(4~I4mL)rCxk6kKwgEhZz`f8o&~i{h;SS zyn;QA&=rP`NdopX$7>>B6O08}Qj!3(6|BXMgI29~z%fs5{hq0Cdn8aGY## z@?0=N1Ma`PqEfut%_m-wOb)<611xvWL7Jh4w;>RVmOd4ZgHV8wD=Z}(Q~?rF&PaTL zg$Sy%0BBXjAZphkH)XM5}>Udk}Lk0px!AG%ePKcQ%`U z4#Frg9gM5O65h_e`B=HZ{1ad}2NQjSq54fz ze=LVN>J4^;33@W-0khK^*JWy+1m-d?y#WuJl-`J*W~4X1({%Jl>opg>p}I^&Z;)=Y z&>yDD6!b?qZ2tLU9%=G<1G~(;6Tl9db|+#w%(+t#T_&7Ap0C;F&EPWC{K1Yh&rZTU zV1JUGgxO_=odVQtdif)LG`GAF_Dw8rh#s@*RGdSm)Ty|qnNL1!0ZbOBnUd$$G`k~j zIK5*ahprLNSO{hS?`NiZRDK!y`vv@Z2*Ij>9mHs5IWKR65TccX4ciXBzqt;=h8>++ zAZvBbFTaIg13(9xw%^tqr9>83YE}H%lz#@X3SBng&iD;w^*W8-U%YX^@K7OIk_Q1(4y5W{k4!HcFA5FRe97Uf%px6R|R;oAth{n6|}hZYkJxE(Z|^Qy8qbdI1Vtc zlgij<^(5>)sS3+n6xlWgy*JkS+i#&Cne<4PbYM;@N?z$>6sa4NWO!0(PQM^d(0>wa zV6`~LX{_XZ#bmu1Yec1L3}c$vlZY`pxj{KRnO>wjagmO_cEoR*Qex~WrIdepO6GRp zpTKF&&sPr85A*50{{4^s8F0UG6MdAuY7>w~)8Uw!;p+l+$2wz6&;reezqc-oW`yG3 zP?zDhssqFyC92E68lGJpQHc6%ZhuT>myicW5BMptTd5YPHI6ATaXo z3hnd+%SaNcUO{^QhQ2&v0bdDx=Gwo}2mfJEl%2jJTzf%{1K{koHn5?FLqQLX3g%Ef z6$23v7=B&`LlZI1i*g$ZFMq>yU2a058Lck7WPECkyYCu+N}*Uel|e6EiFo&WdVjl^ z!LNn<(F5K1x>vz!GKjyL1dG2y;5Qt?GFXsjsOck~N|zV9*%kCKHBpYFVzlHei$z6b zbWW2}KUv5JLola|{^|2(re*$edxz>#j^4}N0bi8 zz)Df7gV}U2lEz(@;W$7iCA4CdxRJ_fu^o)*<}=4Q9T0z4sKb%QW11$gFRpQ1?--o| zVF2Ep;y5NN$G<89dx_Oqm2+E+TLaish9ew!rhfl1=PbcO41dN@pX?7SXPDg?HUVi(4YoL-hCV8{SIB5!?^#w*gpWIq1Hss%%1J8j zL1>RdEkLKl3-XSywzn#W)QGfCOcIYlxcF6JJQ7ujlIM{Y`V;%{m6rbrN~N6+(?|8+ zAX2NvFxCiqd^G;$HS^q_u*FOT28z*j;>kB-1LJkz0=@LXt4~7KIcT<9s!{|a5Ele_p`8wJ16J0j{odeun#gYo@}r@)Pe0# zOrNVI=6@9QMG-e2O#fS#_wck}ddbuwr45%_*b~bRIP>B$2r4;G?<<7S@SKpJh~B7yuQ1Dt|@v9WFUZ&c_S6$QrPzsiyV-yZj{{ zpxPrTH6e9(UpWg^2(`=elxC7PcBTVS2vxk4)1lofeV$A2Dj3R!A3K6K2&;B;_sDv1b;rjn4mS-s^ z_J94M!23v)>E9_!qKhMg^rOrAQM8NopSTn5^|AD%=8LZI2EBm&036pDlXs9N2a?+z z%V6{1Mc0hh?f7aRN8e=nrye?~bRVFp_3}tvdg@ne2nmPWDEm56Jjlzc*f=9G?f!zbbY1 zz-WeX0sp`p&tVx0=mT3Bo0#Irifm39gN+@8F9dUS#YnpC)rdr1BXUY7*e*D&y(dk9 zdIorod`ISqrI1N2+*RoP%t)bTJjU$desW4`PwyIZr3Q_fQ5#3sBxRKh!aSuCyMIs3 zGEl!gs88Xn$>f@Qh|JYHaU^4fGnG;66z1KxrUts;`1a;`;>~K)8bvB)QlxSwMJj31 z#3MnGH*F)YQFMPH(AMXk@69Gjg$I!%mf3}aK=caAc%pcL3N4q;IJ&+$Rt-y=`CF@E4p1AIZ^q_U$ zBZ=holSth^X`$|iJ@&x%#79Pr$jUZwZxl-(fjjIaYD7%wZJD%Nvh#WMbAPnn(XiT& z)`QgWo*hK|=xK|-$|4tjI}qalK}Xa^^j(#8QM4TcJ(BWU`!SC}#MBkeaCVM7n4_w1ujg(z0Kczav8ib-WM0H}a_V-*TLB@rCTYaCQCC(1l@sB?}YPln-gxpcvJn%GD<$o6VhkEH^pJ1|k zx7F4*g5&!YMtWC5Hg#TW+IKuQ)2#lzgIHpkY;q*Ck#s@q9@`TW`+En`+%Y;qS|R-V zn@RoNo;+wjF>>6LO<>LKKB&jRmRivae@^yr8*e;fc?`}ItgZ}LCCPMj4vO18MX|lG z4~)Q58Nm&iz*2K)e>b7ajP%jZ=P!$^ZZW diff --git a/docs/searchindex.js b/docs/searchindex.js index f7e02fe..5feb194 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies":{AdditiveNoise:[27,0,1,""],Boltzmann:[27,0,1,""],Bootstrapped:[27,0,1,""],Categorical:[27,0,1,""],ContinuousEntropy:[27,0,1,""],EGreedy:[27,0,1,""],ExplorationPolicy:[27,0,1,""],Greedy:[27,0,1,""],OUProcess:[27,0,1,""],ParameterNoise:[27,0,1,""],TruncatedNormal:[27,0,1,""],UCB:[27,0,1,""]},"rl_coach.exploration_policies.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}}) \ No newline at end of file +Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[27,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[27,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[27,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[27,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[27,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[27,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[27,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[27,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[27,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[27,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_entropi:27,continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],e_greedi:27,each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],ou_process:27,our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameter_nois:27,parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],truncated_norm:27,ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}}) \ No newline at end of file diff --git a/docs_raw/source/components/exploration_policies/index.rst b/docs_raw/source/components/exploration_policies/index.rst index 10b6c77..3d56dcc 100644 --- a/docs_raw/source/components/exploration_policies/index.rst +++ b/docs_raw/source/components/exploration_policies/index.rst @@ -38,50 +38,50 @@ spaces. ExplorationPolicy ----------------- -.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy +.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy :members: :inherited-members: AdditiveNoise ------------- -.. autoclass:: rl_coach.exploration_policies.AdditiveNoise +.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise Boltzmann --------- -.. autoclass:: rl_coach.exploration_policies.Boltzmann +.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann Bootstrapped ------------ -.. autoclass:: rl_coach.exploration_policies.Bootstrapped +.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped Categorical ----------- -.. autoclass:: rl_coach.exploration_policies.Categorical +.. autoclass:: rl_coach.exploration_policies.categorical.Categorical ContinuousEntropy ----------------- -.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy +.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy EGreedy ------- -.. autoclass:: rl_coach.exploration_policies.EGreedy +.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy Greedy ------ -.. autoclass:: rl_coach.exploration_policies.Greedy +.. autoclass:: rl_coach.exploration_policies.greedy.Greedy OUProcess --------- -.. autoclass:: rl_coach.exploration_policies.OUProcess +.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess ParameterNoise -------------- -.. autoclass:: rl_coach.exploration_policies.ParameterNoise +.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise TruncatedNormal --------------- -.. autoclass:: rl_coach.exploration_policies.TruncatedNormal +.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal UCB --- -.. autoclass:: rl_coach.exploration_policies.UCB \ No newline at end of file +.. autoclass:: rl_coach.exploration_policies.ucb.UCB \ No newline at end of file diff --git a/docs_raw/source/index.rst b/docs_raw/source/index.rst index ca786ee..16c7024 100644 --- a/docs_raw/source/index.rst +++ b/docs_raw/source/index.rst @@ -25,7 +25,7 @@ Blog posts from the Intel® AI website: * `Release 0.10.0 `_ -* `Release 0.11.0 `_ (current release) +* `Release 0.11.0 `_ (current release) You can find more details in the `GitHub repository `_. diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index 21be6be..53dc4c3 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -15,13 +15,11 @@ # import copy -import os import random from collections import OrderedDict from typing import Dict, List, Union, Tuple import numpy as np -from pandas import read_pickle from six.moves import range from rl_coach.agents.agent_interface import AgentInterface diff --git a/rl_coach/coach.py b/rl_coach/coach.py index 33f83ba..a3ded7e 100644 --- a/rl_coach/coach.py +++ b/rl_coach/coach.py @@ -35,7 +35,6 @@ from multiprocessing.managers import BaseManager import subprocess from rl_coach.graph_managers.graph_manager import HumanPlayScheduleParameters, GraphManager from rl_coach.utils import list_all_presets, short_dynamic_import, get_open_port, SharedMemoryScratchPad, get_base_dir -from rl_coach.agents.human_agent import HumanAgentParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.environments.environment import SingleLevelSelection from rl_coach.memories.backend.redis import RedisPubSubMemoryBackendParameters @@ -229,6 +228,8 @@ class CoachLauncher(object): # for human play we need to create a custom graph manager if args.play: + from rl_coach.agents.human_agent import HumanAgentParameters + env_params = short_dynamic_import(args.environment_type, ignore_module_case=True)() env_params.human_control = True schedule_params = HumanPlayScheduleParameters() diff --git a/rl_coach/exploration_policies/__init__.py b/rl_coach/exploration_policies/__init__.py index 922390f..e69de29 100644 --- a/rl_coach/exploration_policies/__init__.py +++ b/rl_coach/exploration_policies/__init__.py @@ -1,55 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from .additive_noise import AdditiveNoiseParameters, AdditiveNoise -from .boltzmann import BoltzmannParameters, Boltzmann -from .bootstrapped import BootstrappedParameters, Bootstrapped -from .categorical import CategoricalParameters, Categorical -from .continuous_entropy import ContinuousEntropyParameters, ContinuousEntropy -from .e_greedy import EGreedyParameters, EGreedy -from .exploration_policy import ExplorationParameters, ExplorationPolicy -from .greedy import GreedyParameters, Greedy -from .ou_process import OUProcessParameters, OUProcess -from .parameter_noise import ParameterNoiseParameters, ParameterNoise -from .truncated_normal import TruncatedNormalParameters, TruncatedNormal -from .ucb import UCBParameters, UCB - -__all__ = [ - 'AdditiveNoiseParameters', - 'AdditiveNoise', - 'BoltzmannParameters', - 'Boltzmann', - 'BootstrappedParameters', - 'Bootstrapped', - 'CategoricalParameters', - 'Categorical', - 'ContinuousEntropyParameters', - 'ContinuousEntropy', - 'EGreedyParameters', - 'EGreedy', - 'ExplorationParameters', - 'ExplorationPolicy', - 'GreedyParameters', - 'Greedy', - 'OUProcessParameters', - 'OUProcess', - 'ParameterNoiseParameters', - 'ParameterNoise', - 'TruncatedNormalParameters', - 'TruncatedNormal', - 'UCBParameters', - 'UCB' -]