Source code for rl_coach.data_stores.nfs_data_store
-import uuid
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import uuid
from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
diff --git a/docs/_modules/rl_coach/data_stores/s3_data_store.html b/docs/_modules/rl_coach/data_stores/s3_data_store.html
index 40aba1d..dc9fd7e 100644
--- a/docs/_modules/rl_coach/data_stores/s3_data_store.html
+++ b/docs/_modules/rl_coach/data_stores/s3_data_store.html
@@ -178,7 +178,24 @@
Source code for rl_coach.data_stores.s3_data_store
-from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
from minio import Minio
from minio.error import ResponseError
from configparser import ConfigParser, Error
diff --git a/docs/_modules/rl_coach/exploration_policies/additive_noise.html b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
index 83c73ff..1bd8dca 100644
--- a/docs/_modules/rl_coach/exploration_policies/additive_noise.html
+++ b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
@@ -216,7 +216,7 @@
return 'rl_coach.exploration_policies.additive_noise:AdditiveNoise'
-[docs]class AdditiveNoise(ExplorationPolicy):
+[docs]class AdditiveNoise(ExplorationPolicy):
"""
AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent
and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that
diff --git a/docs/_modules/rl_coach/exploration_policies/boltzmann.html b/docs/_modules/rl_coach/exploration_policies/boltzmann.html
index ad34b34..a71de9d 100644
--- a/docs/_modules/rl_coach/exploration_policies/boltzmann.html
+++ b/docs/_modules/rl_coach/exploration_policies/boltzmann.html
@@ -215,7 +215,7 @@
-[docs]class Boltzmann(ExplorationPolicy):
+[docs]class Boltzmann(ExplorationPolicy):
"""
The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible
actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values
diff --git a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
index 35058a4..ea3ac97 100644
--- a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
+++ b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
@@ -218,7 +218,7 @@
return 'rl_coach.exploration_policies.bootstrapped:Bootstrapped'
-[docs]class Bootstrapped(EGreedy):
+[docs]class Bootstrapped(EGreedy):
"""
Bootstrapped exploration policy is currently only used for discrete action spaces along with the
Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the
diff --git a/docs/_modules/rl_coach/exploration_policies/categorical.html b/docs/_modules/rl_coach/exploration_policies/categorical.html
index edfcf2a..18925ee 100644
--- a/docs/_modules/rl_coach/exploration_policies/categorical.html
+++ b/docs/_modules/rl_coach/exploration_policies/categorical.html
@@ -209,7 +209,7 @@
return 'rl_coach.exploration_policies.categorical:Categorical'
-[docs]class Categorical(ExplorationPolicy):
+[docs]class Categorical(ExplorationPolicy):
"""
Categorical exploration policy is intended for discrete action spaces. It expects the action values to
represent a probability distribution over the action, from which a single action will be sampled.
diff --git a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
index 6fb3c16..39ac379 100644
--- a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
+++ b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
@@ -203,7 +203,7 @@
return 'rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy'
-[docs]class ContinuousEntropy(AdditiveNoise):
+[docs]class ContinuousEntropy(AdditiveNoise):
"""
Continuous entropy is an exploration policy that is actually implemented as part of the network.
The exploration policy class is only a placeholder for choosing this policy. The exploration policy is
diff --git a/docs/_modules/rl_coach/exploration_policies/e_greedy.html b/docs/_modules/rl_coach/exploration_policies/e_greedy.html
index deecf18..2e88e22 100644
--- a/docs/_modules/rl_coach/exploration_policies/e_greedy.html
+++ b/docs/_modules/rl_coach/exploration_policies/e_greedy.html
@@ -222,7 +222,7 @@
return 'rl_coach.exploration_policies.e_greedy:EGreedy'
-[docs]class EGreedy(ExplorationPolicy):
+[docs]class EGreedy(ExplorationPolicy):
"""
e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.
diff --git a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
index e8b56bd..bef11d5 100644
--- a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
+++ b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
@@ -210,7 +210,7 @@
return 'rl_coach.exploration_policies.exploration_policy:ExplorationPolicy'
-[docs]class ExplorationPolicy(object):
+[docs]class ExplorationPolicy(object):
"""
An exploration policy takes the predicted actions or action values from the agent, and selects the action to
actually apply to the environment using some predefined algorithm.
@@ -222,14 +222,14 @@
self.phase = RunPhase.HEATUP
self.action_space = action_space
-[docs] def reset(self):
+[docs] def reset(self):
"""
Used for resetting the exploration policy parameters when needed
:return: None
"""
pass
-[docs] def get_action(self, action_values: List[ActionType]) -> ActionType:
+[docs] def get_action(self, action_values: List[ActionType]) -> ActionType:
"""
Given a list of values corresponding to each action,
choose one actions according to the exploration policy
@@ -243,7 +243,7 @@
else:
raise ValueError("The get_action function should be overridden in the inheriting exploration class")
-[docs] def change_phase(self, phase):
+[docs] def change_phase(self, phase):
"""
Change between running phases of the algorithm
:param phase: Either Heatup or Train
@@ -251,7 +251,7 @@
"""
self.phase = phase
-[docs] def requires_action_values(self) -> bool:
+[docs] def requires_action_values(self) -> bool:
"""
Allows exploration policies to define if they require the action values for the current step.
This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller
diff --git a/docs/_modules/rl_coach/exploration_policies/greedy.html b/docs/_modules/rl_coach/exploration_policies/greedy.html
index fe0d0fd..8bcfca2 100644
--- a/docs/_modules/rl_coach/exploration_policies/greedy.html
+++ b/docs/_modules/rl_coach/exploration_policies/greedy.html
@@ -209,7 +209,7 @@
return 'rl_coach.exploration_policies.greedy:Greedy'
-[docs]class Greedy(ExplorationPolicy):
+[docs]class Greedy(ExplorationPolicy):
"""
The Greedy exploration policy is intended for both discrete and continuous action spaces.
For discrete action spaces, it always selects the action with the maximum value, as given by the agent.
diff --git a/docs/_modules/rl_coach/exploration_policies/ou_process.html b/docs/_modules/rl_coach/exploration_policies/ou_process.html
index 0df44f5..15e0dcb 100644
--- a/docs/_modules/rl_coach/exploration_policies/ou_process.html
+++ b/docs/_modules/rl_coach/exploration_policies/ou_process.html
@@ -219,7 +219,7 @@
# Ornstein-Uhlenbeck process
-[docs]class OUProcess(ExplorationPolicy):
+[docs]class OUProcess(ExplorationPolicy):
"""
OUProcess exploration policy is intended for continuous action spaces, and selects the action according to
an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where
diff --git a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
index dcd0aea..fd5b326 100644
--- a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
+++ b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
@@ -210,7 +210,8 @@
class ParameterNoiseParameters(ExplorationParameters):
def __init__(self, agent_params: AgentParameters):
super().__init__()
- if not isinstance(agent_params, DQNAgentParameters):
+
+ if not agent_params.algorithm.supports_parameter_noise:
raise ValueError("Currently only DQN variants are supported for using an exploration type of "
"ParameterNoise.")
@@ -221,7 +222,7 @@
return 'rl_coach.exploration_policies.parameter_noise:ParameterNoise'
-[docs]class ParameterNoise(ExplorationPolicy):
+[docs]class ParameterNoise(ExplorationPolicy):
"""
The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.
It applies the exploration policy by replacing all the dense network layers with noisy layers.
diff --git a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
index 04a6205..11b9bfc 100644
--- a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
+++ b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
@@ -218,7 +218,7 @@
return 'rl_coach.exploration_policies.truncated_normal:TruncatedNormal'
-[docs]class TruncatedNormal(ExplorationPolicy):
+[docs]class TruncatedNormal(ExplorationPolicy):
"""
The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a
normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t
diff --git a/docs/_modules/rl_coach/exploration_policies/ucb.html b/docs/_modules/rl_coach/exploration_policies/ucb.html
index 88b0978..562dcba 100644
--- a/docs/_modules/rl_coach/exploration_policies/ucb.html
+++ b/docs/_modules/rl_coach/exploration_policies/ucb.html
@@ -222,7 +222,7 @@
return 'rl_coach.exploration_policies.ucb:UCB'
-[docs]class UCB(EGreedy):
+[docs]class UCB(EGreedy):
"""
UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
diff --git a/docs/_modules/rl_coach/memories/backend/redis.html b/docs/_modules/rl_coach/memories/backend/redis.html
index 842700d..b00fab0 100644
--- a/docs/_modules/rl_coach/memories/backend/redis.html
+++ b/docs/_modules/rl_coach/memories/backend/redis.html
@@ -178,7 +178,23 @@
Source code for rl_coach.memories.backend.redis
-
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
import redis
import pickle
import uuid
diff --git a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
index cb6bf56..0fcd72b 100644
--- a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
+++ b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
@@ -178,7 +178,24 @@
Source code for rl_coach.memories.non_episodic.transition_collection
-from rl_coach.core_types import Transition
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from rl_coach.core_types import Transition
[docs]class TransitionCollection(object):
diff --git a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
index 83db11f..9932e81 100644
--- a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
+++ b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
@@ -178,7 +178,24 @@
Source code for rl_coach.orchestrators.kubernetes_orchestrator
-import os
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import os
import uuid
import json
import time
diff --git a/docs/_sources/components/exploration_policies/index.rst.txt b/docs/_sources/components/exploration_policies/index.rst.txt
index 10b6c77..3d56dcc 100644
--- a/docs/_sources/components/exploration_policies/index.rst.txt
+++ b/docs/_sources/components/exploration_policies/index.rst.txt
@@ -38,50 +38,50 @@ spaces.
ExplorationPolicy
-----------------
-.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
+.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
:members:
:inherited-members:
AdditiveNoise
-------------
-.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
+.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
Boltzmann
---------
-.. autoclass:: rl_coach.exploration_policies.Boltzmann
+.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
Bootstrapped
------------
-.. autoclass:: rl_coach.exploration_policies.Bootstrapped
+.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
Categorical
-----------
-.. autoclass:: rl_coach.exploration_policies.Categorical
+.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
ContinuousEntropy
-----------------
-.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
+.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
EGreedy
-------
-.. autoclass:: rl_coach.exploration_policies.EGreedy
+.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
Greedy
------
-.. autoclass:: rl_coach.exploration_policies.Greedy
+.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
OUProcess
---------
-.. autoclass:: rl_coach.exploration_policies.OUProcess
+.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
ParameterNoise
--------------
-.. autoclass:: rl_coach.exploration_policies.ParameterNoise
+.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
TruncatedNormal
---------------
-.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
+.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
UCB
---
-.. autoclass:: rl_coach.exploration_policies.UCB
\ No newline at end of file
+.. autoclass:: rl_coach.exploration_policies.ucb.UCB
\ No newline at end of file
diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt
index ca786ee..16c7024 100644
--- a/docs/_sources/index.rst.txt
+++ b/docs/_sources/index.rst.txt
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
* `Release 0.10.0 `_
-* `Release 0.11.0 `_ (current release)
+* `Release 0.11.0 `_ (current release)
You can find more details in the `GitHub repository `_.
diff --git a/docs/components/exploration_policies/index.html b/docs/components/exploration_policies/index.html
index f9d658a..388bbc8 100644
--- a/docs/components/exploration_policies/index.html
+++ b/docs/components/exploration_policies/index.html
@@ -264,8 +264,8 @@ spaces.
ExplorationPolicy¶
--
-class
rl_coach.exploration_policies.ExplorationPolicy(action_space: rl_coach.spaces.ActionSpace)[source]¶
+-
+class
rl_coach.exploration_policies.exploration_policy.ExplorationPolicy(action_space: rl_coach.spaces.ActionSpace)[source]¶
An exploration policy takes the predicted actions or action values from the agent, and selects the action to
actually apply to the environment using some predefined algorithm.
@@ -277,16 +277,16 @@ actually apply to the environment using some predefined algorithm.
--
-
change_phase(phase)[source]¶
+-
+
change_phase(phase)[source]¶
Change between running phases of the algorithm
:param phase: Either Heatup or Train
:return: none
--
-
get_action(action_values: List[Union[int, float, numpy.ndarray, List]]) → Union[int, float, numpy.ndarray, List][source]¶
+-
+
get_action(action_values: List[Union[int, float, numpy.ndarray, List]]) → Union[int, float, numpy.ndarray, List][source]¶
Given a list of values corresponding to each action,
choose one actions according to the exploration policy
:param action_values: A list of action values
@@ -294,8 +294,8 @@ choose one actions according to the exploration policy
--
-
requires_action_values() → bool[source]¶
+-
+
requires_action_values() → bool[source]¶
Allows exploration policies to define if they require the action values for the current step.
This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller
than epsilon, the action is completely random, and the action values don’t need to be calculated
@@ -303,8 +303,8 @@ than epsilon, the action is completely random, and the action values don’t nee
--
-
reset()[source]¶
+-
+
reset()[source]¶
Used for resetting the exploration policy parameters when needed
:return: None
@@ -315,8 +315,8 @@ than epsilon, the action is completely random, and the action values don’t nee
AdditiveNoise¶
--
-class
rl_coach.exploration_policies.AdditiveNoise(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]¶
+-
+class
rl_coach.exploration_policies.additive_noise.AdditiveNoise(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]¶
AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent
and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that
can be given in two different ways:
@@ -343,8 +343,8 @@ of the action space
Boltzmann¶
--
-class
rl_coach.exploration_policies.Boltzmann(action_space: rl_coach.spaces.ActionSpace, temperature_schedule: rl_coach.schedules.Schedule)[source]¶
+-
+class
rl_coach.exploration_policies.boltzmann.Boltzmann(action_space: rl_coach.spaces.ActionSpace, temperature_schedule: rl_coach.schedules.Schedule)[source]¶
The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible
actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values
into a distribution over the actions. It then samples the action for playing out of the calculated distribution.
@@ -367,8 +367,8 @@ An additional temperature schedule can be given by the user, and will control th
Bootstrapped¶
--
-class
rl_coach.exploration_policies.Bootstrapped(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
+-
+class
rl_coach.exploration_policies.bootstrapped.Bootstrapped(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
Bootstrapped exploration policy is currently only used for discrete action spaces along with the
Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the
values for all the possible actions. For each episode, a single head is selected to lead the agent, according
@@ -401,8 +401,8 @@ if the e-greedy is used for a continuous policy
Categorical¶
--
-class
rl_coach.exploration_policies.Categorical(action_space: rl_coach.spaces.ActionSpace)[source]¶
+-
+class
rl_coach.exploration_policies.categorical.Categorical(action_space: rl_coach.spaces.ActionSpace)[source]¶
Categorical exploration policy is intended for discrete action spaces. It expects the action values to
represent a probability distribution over the action, from which a single action will be sampled.
In evaluation, the action that has the highest probability will be selected. This is particularly useful for
@@ -421,8 +421,8 @@ actor-critic schemes, where the actors output is a probability distribution over
ContinuousEntropy¶
--
-class
rl_coach.exploration_policies.ContinuousEntropy(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]¶
+-
+class
rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]¶
Continuous entropy is an exploration policy that is actually implemented as part of the network.
The exploration policy class is only a placeholder for choosing this policy. The exploration policy is
implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action.
@@ -453,8 +453,8 @@ of the action space
EGreedy¶
--
-class
rl_coach.exploration_policies.EGreedy(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
+-
+class
rl_coach.exploration_policies.e_greedy.EGreedy(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.
For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the
highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the
@@ -485,8 +485,8 @@ if the e-greedy is used for a continuous policy
Greedy¶
--
-class
rl_coach.exploration_policies.Greedy(action_space: rl_coach.spaces.ActionSpace)[source]¶
+-
+class
rl_coach.exploration_policies.greedy.Greedy(action_space: rl_coach.spaces.ActionSpace)[source]¶
The Greedy exploration policy is intended for both discrete and continuous action spaces.
For discrete action spaces, it always selects the action with the maximum value, as given by the agent.
For continuous action spaces, it always return the exact action, as it was given by the agent.
@@ -504,8 +504,8 @@ For continuous action spaces, it always return the exact action, as it was given
OUProcess¶
--
-class
rl_coach.exploration_policies.OUProcess(action_space: rl_coach.spaces.ActionSpace, mu: float = 0, theta: float = 0.15, sigma: float = 0.2, dt: float = 0.01)[source]¶
+-
+class
rl_coach.exploration_policies.ou_process.OUProcess(action_space: rl_coach.spaces.ActionSpace, mu: float = 0, theta: float = 0.15, sigma: float = 0.2, dt: float = 0.01)[source]¶
OUProcess exploration policy is intended for continuous action spaces, and selects the action according to
an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where
the samples are correlated between consequent time steps.
@@ -523,8 +523,8 @@ the samples are correlated between consequent time steps.
ParameterNoise¶
--
-class
rl_coach.exploration_policies.ParameterNoise(network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace)[source]¶
+-
+class
rl_coach.exploration_policies.parameter_noise.ParameterNoise(network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace)[source]¶
The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.
It applies the exploration policy by replacing all the dense network layers with noisy layers.
The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network
@@ -545,8 +545,8 @@ values.
TruncatedNormal¶
--
-class
rl_coach.exploration_policies.TruncatedNormal(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float, clip_low: float, clip_high: float)[source]¶
+-
+class
rl_coach.exploration_policies.truncated_normal.TruncatedNormal(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float, clip_low: float, clip_high: float)[source]¶
The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a
normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t
wo different ways:
@@ -575,8 +575,8 @@ of the action space
UCB¶
--
-class
rl_coach.exploration_policies.UCB(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, lamb: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
+-
+class
rl_coach.exploration_policies.ucb.UCB(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, lamb: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]¶
UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
between the heads predictions represents the uncertainty of the agent in each of the actions.
diff --git a/docs/genindex.html b/docs/genindex.html
index 8198172..e4ca940 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -226,7 +226,7 @@
- ActorCriticAlgorithmParameters (class in rl_coach.agents.actor_critic_agent)
- - AdditiveNoise (class in rl_coach.exploration_policies)
+
- AdditiveNoise (class in rl_coach.exploration_policies.additive_noise)
- - Bootstrapped (class in rl_coach.exploration_policies)
+
- Bootstrapped (class in rl_coach.exploration_policies.bootstrapped)
- BoxActionSpace (class in rl_coach.spaces)
@@ -288,11 +288,11 @@
- CarlaEnvironment (class in rl_coach.environments.carla_environment)
- - Categorical (class in rl_coach.exploration_policies)
+
- Categorical (class in rl_coach.exploration_policies.categorical)
- CategoricalDQNAlgorithmParameters (class in rl_coach.agents.categorical_dqn_agent)
- - change_phase() (rl_coach.exploration_policies.ExplorationPolicy method)
+
- change_phase() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
- choose_action() (rl_coach.agents.agent.Agent method)
@@ -328,7 +328,7 @@
- construct() (rl_coach.architectures.architecture.Architecture static method)
- - ContinuousEntropy (class in rl_coach.exploration_policies)
+
- ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)
- ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)
@@ -368,7 +368,7 @@
E
- game_overs() (rl_coach.core_types.Batch method)
- - get_action() (rl_coach.exploration_policies.ExplorationPolicy method)
+
- get_action() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
- get_action_from_user() (rl_coach.environments.environment.Environment method)
@@ -466,7 +466,7 @@
- GoalsSpace.DistanceMetric (class in rl_coach.spaces)
- - Greedy (class in rl_coach.exploration_policies)
+
- Greedy (class in rl_coach.exploration_policies.greedy)
- GymEnvironment (class in rl_coach.environments.gym_environment)
@@ -626,7 +626,7 @@
- (rl_coach.agents.dqn_agent.DQNAgent method)
- - OUProcess (class in rl_coach.exploration_policies)
+
- OUProcess (class in rl_coach.exploration_policies.ou_process)
@@ -640,7 +640,7 @@
- parallel_prediction() (rl_coach.architectures.network_wrapper.NetworkWrapper method)
- - ParameterNoise (class in rl_coach.exploration_policies)
+
- ParameterNoise (class in rl_coach.exploration_policies.parameter_noise)
- parent (rl_coach.agents.agent.Agent attribute)
@@ -714,9 +714,9 @@
- render() (rl_coach.environments.environment.Environment method)
- - requires_action_values() (rl_coach.exploration_policies.ExplorationPolicy method)
+
- requires_action_values() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
- - reset() (rl_coach.exploration_policies.ExplorationPolicy method)
+
- reset() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)
- reset_accumulated_gradients() (rl_coach.architectures.architecture.Architecture method)
@@ -870,7 +870,7 @@
- TransitionCollection (class in rl_coach.memories.non_episodic)
- - TruncatedNormal (class in rl_coach.exploration_policies)
+
- TruncatedNormal (class in rl_coach.exploration_policies.truncated_normal)
@@ -878,7 +878,7 @@
U
- - UCB (class in rl_coach.exploration_policies)
+
- UCB (class in rl_coach.exploration_policies.ucb)
- update_discounted_rewards() (rl_coach.core_types.Episode method)
diff --git a/docs/index.html b/docs/index.html
index 90e8c90..a82e4d0 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -194,7 +194,7 @@ Coach collects statistics from the training process and supports advanced visual
- Release 0.8.0 (initial release)
- Release 0.9.0
- Release 0.10.0
-- Release 0.11.0 (current release)
+- Release 0.11.0 (current release)
You can find more details in the GitHub repository.
diff --git a/docs/objects.inv b/docs/objects.inv
index b89806e..4c3b349 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/searchindex.js b/docs/searchindex.js
index f7e02fe..5feb194 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies":{AdditiveNoise:[27,0,1,""],Boltzmann:[27,0,1,""],Bootstrapped:[27,0,1,""],Categorical:[27,0,1,""],ContinuousEntropy:[27,0,1,""],EGreedy:[27,0,1,""],ExplorationPolicy:[27,0,1,""],Greedy:[27,0,1,""],OUProcess:[27,0,1,""],ParameterNoise:[27,0,1,""],TruncatedNormal:[27,0,1,""],UCB:[27,0,1,""]},"rl_coach.exploration_policies.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}})
\ No newline at end of file
+Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[27,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[27,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[27,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[27,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[27,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[27,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[27,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[27,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[27,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[27,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_entropi:27,continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],e_greedi:27,each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],ou_process:27,our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameter_nois:27,parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],truncated_norm:27,ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}})
\ No newline at end of file
diff --git a/docs_raw/source/components/exploration_policies/index.rst b/docs_raw/source/components/exploration_policies/index.rst
index 10b6c77..3d56dcc 100644
--- a/docs_raw/source/components/exploration_policies/index.rst
+++ b/docs_raw/source/components/exploration_policies/index.rst
@@ -38,50 +38,50 @@ spaces.
ExplorationPolicy
-----------------
-.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
+.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
:members:
:inherited-members:
AdditiveNoise
-------------
-.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
+.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
Boltzmann
---------
-.. autoclass:: rl_coach.exploration_policies.Boltzmann
+.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
Bootstrapped
------------
-.. autoclass:: rl_coach.exploration_policies.Bootstrapped
+.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
Categorical
-----------
-.. autoclass:: rl_coach.exploration_policies.Categorical
+.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
ContinuousEntropy
-----------------
-.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
+.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
EGreedy
-------
-.. autoclass:: rl_coach.exploration_policies.EGreedy
+.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
Greedy
------
-.. autoclass:: rl_coach.exploration_policies.Greedy
+.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
OUProcess
---------
-.. autoclass:: rl_coach.exploration_policies.OUProcess
+.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
ParameterNoise
--------------
-.. autoclass:: rl_coach.exploration_policies.ParameterNoise
+.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
TruncatedNormal
---------------
-.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
+.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
UCB
---
-.. autoclass:: rl_coach.exploration_policies.UCB
\ No newline at end of file
+.. autoclass:: rl_coach.exploration_policies.ucb.UCB
\ No newline at end of file
diff --git a/docs_raw/source/index.rst b/docs_raw/source/index.rst
index ca786ee..16c7024 100644
--- a/docs_raw/source/index.rst
+++ b/docs_raw/source/index.rst
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
* `Release 0.10.0 `_
-* `Release 0.11.0 `_ (current release)
+* `Release 0.11.0 `_ (current release)
You can find more details in the `GitHub repository `_.
diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py
index 21be6be..53dc4c3 100644
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -15,13 +15,11 @@
#
import copy
-import os
import random
from collections import OrderedDict
from typing import Dict, List, Union, Tuple
import numpy as np
-from pandas import read_pickle
from six.moves import range
from rl_coach.agents.agent_interface import AgentInterface
diff --git a/rl_coach/coach.py b/rl_coach/coach.py
index 33f83ba..a3ded7e 100644
--- a/rl_coach/coach.py
+++ b/rl_coach/coach.py
@@ -35,7 +35,6 @@ from multiprocessing.managers import BaseManager
import subprocess
from rl_coach.graph_managers.graph_manager import HumanPlayScheduleParameters, GraphManager
from rl_coach.utils import list_all_presets, short_dynamic_import, get_open_port, SharedMemoryScratchPad, get_base_dir
-from rl_coach.agents.human_agent import HumanAgentParameters
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.environments.environment import SingleLevelSelection
from rl_coach.memories.backend.redis import RedisPubSubMemoryBackendParameters
@@ -229,6 +228,8 @@ class CoachLauncher(object):
# for human play we need to create a custom graph manager
if args.play:
+ from rl_coach.agents.human_agent import HumanAgentParameters
+
env_params = short_dynamic_import(args.environment_type, ignore_module_case=True)()
env_params.human_control = True
schedule_params = HumanPlayScheduleParameters()
diff --git a/rl_coach/exploration_policies/__init__.py b/rl_coach/exploration_policies/__init__.py
index 922390f..e69de29 100644
--- a/rl_coach/exploration_policies/__init__.py
+++ b/rl_coach/exploration_policies/__init__.py
@@ -1,55 +0,0 @@
-#
-# Copyright (c) 2017 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from .additive_noise import AdditiveNoiseParameters, AdditiveNoise
-from .boltzmann import BoltzmannParameters, Boltzmann
-from .bootstrapped import BootstrappedParameters, Bootstrapped
-from .categorical import CategoricalParameters, Categorical
-from .continuous_entropy import ContinuousEntropyParameters, ContinuousEntropy
-from .e_greedy import EGreedyParameters, EGreedy
-from .exploration_policy import ExplorationParameters, ExplorationPolicy
-from .greedy import GreedyParameters, Greedy
-from .ou_process import OUProcessParameters, OUProcess
-from .parameter_noise import ParameterNoiseParameters, ParameterNoise
-from .truncated_normal import TruncatedNormalParameters, TruncatedNormal
-from .ucb import UCBParameters, UCB
-
-__all__ = [
- 'AdditiveNoiseParameters',
- 'AdditiveNoise',
- 'BoltzmannParameters',
- 'Boltzmann',
- 'BootstrappedParameters',
- 'Bootstrapped',
- 'CategoricalParameters',
- 'Categorical',
- 'ContinuousEntropyParameters',
- 'ContinuousEntropy',
- 'EGreedyParameters',
- 'EGreedy',
- 'ExplorationParameters',
- 'ExplorationPolicy',
- 'GreedyParameters',
- 'Greedy',
- 'OUProcessParameters',
- 'OUProcess',
- 'ParameterNoiseParameters',
- 'ParameterNoise',
- 'TruncatedNormalParameters',
- 'TruncatedNormal',
- 'UCBParameters',
- 'UCB'
-]