diff --git a/README.md b/README.md
index b341cbb..f93cddd 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ Blog posts from the Intel® AI website:
 * [Release 0.8.0](https://ai.intel.com/reinforcement-learning-coach-intel/) (initial release)
 * [Release 0.9.0](https://ai.intel.com/reinforcement-learning-coach-carla-qr-dqn/)
 * [Release 0.10.0](https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)
-* Release 0.11 (current release)
+* [Release 0.11.0](https://ai.intel.com/rl-coach-data-science-at-scale) (current release)
 
 Contacting the Coach development team is also possible through the email [coach@intel.com](coach@intel.com)
 
diff --git a/docs/_modules/rl_coach/agents/agent.html b/docs/_modules/rl_coach/agents/agent.html
index 4c84795..896a832 100644
--- a/docs/_modules/rl_coach/agents/agent.html
+++ b/docs/_modules/rl_coach/agents/agent.html
@@ -195,13 +195,11 @@
 <span class="c1">#</span>
 
 <span class="kn">import</span> <span class="nn">copy</span>
-<span class="kn">import</span> <span class="nn">os</span>
 <span class="kn">import</span> <span class="nn">random</span>
 <span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">OrderedDict</span>
 <span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Tuple</span>
 
 <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
-<span class="kn">from</span> <span class="nn">pandas</span> <span class="k">import</span> <span class="n">read_pickle</span>
 <span class="kn">from</span> <span class="nn">six.moves</span> <span class="k">import</span> <span class="nb">range</span>
 
 <span class="kn">from</span> <span class="nn">rl_coach.agents.agent_interface</span> <span class="k">import</span> <span class="n">AgentInterface</span>
diff --git a/docs/_modules/rl_coach/agents/dqn_agent.html b/docs/_modules/rl_coach/agents/dqn_agent.html
index 7e99453..b956527 100644
--- a/docs/_modules/rl_coach/agents/dqn_agent.html
+++ b/docs/_modules/rl_coach/agents/dqn_agent.html
@@ -215,7 +215,8 @@
         <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">num_steps_between_copying_online_weights_to_target</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">num_consecutive_playing_steps</span> <span class="o">=</span> <span class="n">EnvironmentSteps</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span></div>
+        <span class="bp">self</span><span class="o">.</span><span class="n">discount</span> <span class="o">=</span> <span class="mf">0.99</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">True</span></div>
 
 
 <span class="k">class</span> <span class="nc">DQNNetworkParameters</span><span class="p">(</span><span class="n">NetworkParameters</span><span class="p">):</span>
diff --git a/docs/_modules/rl_coach/base_parameters.html b/docs/_modules/rl_coach/base_parameters.html
index f595114..9a0297f 100644
--- a/docs/_modules/rl_coach/base_parameters.html
+++ b/docs/_modules/rl_coach/base_parameters.html
@@ -391,6 +391,9 @@
         <span class="c1"># Should the workers wait for full episode</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">act_for_full_episodes</span> <span class="o">=</span> <span class="kc">False</span>
 
+        <span class="c1"># Support for parameter noise</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">supports_parameter_noise</span> <span class="o">=</span> <span class="kc">False</span>
+
 
 <div class="viewcode-block" id="PresetValidationParameters"><a class="viewcode-back" href="../../components/additional_parameters.html#rl_coach.base_parameters.PresetValidationParameters">[docs]</a><span class="k">class</span> <span class="nc">PresetValidationParameters</span><span class="p">(</span><span class="n">Parameters</span><span class="p">):</span>
     <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
diff --git a/docs/_modules/rl_coach/data_stores/nfs_data_store.html b/docs/_modules/rl_coach/data_stores/nfs_data_store.html
index ec92028..d86c06a 100644
--- a/docs/_modules/rl_coach/data_stores/nfs_data_store.html
+++ b/docs/_modules/rl_coach/data_stores/nfs_data_store.html
@@ -178,7 +178,24 @@
            <div itemprop="articleBody">
             
   <h1>Source code for rl_coach.data_stores.nfs_data_store</h1><div class="highlight"><pre>
-<span></span><span class="kn">import</span> <span class="nn">uuid</span>
+<span></span><span class="c1">#</span>
+<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#      http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
+<span class="kn">import</span> <span class="nn">uuid</span>
 
 <span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
 
diff --git a/docs/_modules/rl_coach/data_stores/s3_data_store.html b/docs/_modules/rl_coach/data_stores/s3_data_store.html
index 40aba1d..dc9fd7e 100644
--- a/docs/_modules/rl_coach/data_stores/s3_data_store.html
+++ b/docs/_modules/rl_coach/data_stores/s3_data_store.html
@@ -178,7 +178,24 @@
            <div itemprop="articleBody">
             
   <h1>Source code for rl_coach.data_stores.s3_data_store</h1><div class="highlight"><pre>
-<span></span><span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
+<span></span><span class="c1">#</span>
+<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#      http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
+<span class="kn">from</span> <span class="nn">rl_coach.data_stores.data_store</span> <span class="k">import</span> <span class="n">DataStore</span><span class="p">,</span> <span class="n">DataStoreParameters</span>
 <span class="kn">from</span> <span class="nn">minio</span> <span class="k">import</span> <span class="n">Minio</span>
 <span class="kn">from</span> <span class="nn">minio.error</span> <span class="k">import</span> <span class="n">ResponseError</span>
 <span class="kn">from</span> <span class="nn">configparser</span> <span class="k">import</span> <span class="n">ConfigParser</span><span class="p">,</span> <span class="n">Error</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/additive_noise.html b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
index 83c73ff..1bd8dca 100644
--- a/docs/_modules/rl_coach/exploration_policies/additive_noise.html
+++ b/docs/_modules/rl_coach/exploration_policies/additive_noise.html
@@ -216,7 +216,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.additive_noise:AdditiveNoise&#39;</span>
 
 
-<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="AdditiveNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.additive_noise.AdditiveNoise">[docs]</a><span class="k">class</span> <span class="nc">AdditiveNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent</span>
 <span class="sd">    and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/boltzmann.html b/docs/_modules/rl_coach/exploration_policies/boltzmann.html
index ad34b34..a71de9d 100644
--- a/docs/_modules/rl_coach/exploration_policies/boltzmann.html
+++ b/docs/_modules/rl_coach/exploration_policies/boltzmann.html
@@ -215,7 +215,7 @@
 
 
 
-<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="Boltzmann"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.boltzmann.Boltzmann">[docs]</a><span class="k">class</span> <span class="nc">Boltzmann</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible</span>
 <span class="sd">    actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
index 35058a4..ea3ac97 100644
--- a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
+++ b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html
@@ -218,7 +218,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.bootstrapped:Bootstrapped&#39;</span>
 
 
-<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
+<div class="viewcode-block" id="Bootstrapped"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.bootstrapped.Bootstrapped">[docs]</a><span class="k">class</span> <span class="nc">Bootstrapped</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Bootstrapped exploration policy is currently only used for discrete action spaces along with the</span>
 <span class="sd">    Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/categorical.html b/docs/_modules/rl_coach/exploration_policies/categorical.html
index edfcf2a..18925ee 100644
--- a/docs/_modules/rl_coach/exploration_policies/categorical.html
+++ b/docs/_modules/rl_coach/exploration_policies/categorical.html
@@ -209,7 +209,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.categorical:Categorical&#39;</span>
 
 
-<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="Categorical"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.categorical.Categorical">[docs]</a><span class="k">class</span> <span class="nc">Categorical</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Categorical exploration policy is intended for discrete action spaces. It expects the action values to</span>
 <span class="sd">    represent a probability distribution over the action, from which a single action will be sampled.</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
index 6fb3c16..39ac379 100644
--- a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
+++ b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html
@@ -203,7 +203,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy&#39;</span>
 
 
-<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
+<div class="viewcode-block" id="ContinuousEntropy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">[docs]</a><span class="k">class</span> <span class="nc">ContinuousEntropy</span><span class="p">(</span><span class="n">AdditiveNoise</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Continuous entropy is an exploration policy that is actually implemented as part of the network.</span>
 <span class="sd">    The exploration policy class is only a placeholder for choosing this policy. The exploration policy is</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/e_greedy.html b/docs/_modules/rl_coach/exploration_policies/e_greedy.html
index deecf18..2e88e22 100644
--- a/docs/_modules/rl_coach/exploration_policies/e_greedy.html
+++ b/docs/_modules/rl_coach/exploration_policies/e_greedy.html
@@ -222,7 +222,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.e_greedy:EGreedy&#39;</span>
 
 
-<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="EGreedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">[docs]</a><span class="k">class</span> <span class="nc">EGreedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</span>
 
diff --git a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
index e8b56bd..bef11d5 100644
--- a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
+++ b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html
@@ -210,7 +210,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.exploration_policy:ExplorationPolicy&#39;</span>
 
 
-<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+<div class="viewcode-block" id="ExplorationPolicy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">[docs]</a><span class="k">class</span> <span class="nc">ExplorationPolicy</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    An exploration policy takes the predicted actions or action values from the agent, and selects the action to</span>
 <span class="sd">    actually apply to the environment using some predefined algorithm.</span>
@@ -222,14 +222,14 @@
         <span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">RunPhase</span><span class="o">.</span><span class="n">HEATUP</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">action_space</span>
 
-<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.reset">[docs]</a>    <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<div class="viewcode-block" id="ExplorationPolicy.reset"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">[docs]</a>    <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">        Used for resetting the exploration policy parameters when needed</span>
 <span class="sd">        :return: None</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">pass</span></div>
 
-<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.get_action">[docs]</a>    <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">ActionType</span><span class="p">:</span>
+<div class="viewcode-block" id="ExplorationPolicy.get_action"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">[docs]</a>    <span class="k">def</span> <span class="nf">get_action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action_values</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ActionType</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">ActionType</span><span class="p">:</span>
         <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">        Given a list of values corresponding to each action, </span>
 <span class="sd">        choose one actions according to the exploration policy</span>
@@ -243,7 +243,7 @@
         <span class="k">else</span><span class="p">:</span>
             <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The get_action function should be overridden in the inheriting exploration class&quot;</span><span class="p">)</span></div>
 
-<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.change_phase">[docs]</a>    <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
+<div class="viewcode-block" id="ExplorationPolicy.change_phase"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">[docs]</a>    <span class="k">def</span> <span class="nf">change_phase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">phase</span><span class="p">):</span>
         <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">        Change between running phases of the algorithm</span>
 <span class="sd">        :param phase: Either Heatup or Train</span>
@@ -251,7 +251,7 @@
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">phase</span> <span class="o">=</span> <span class="n">phase</span></div>
 
-<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">[docs]</a>    <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
+<div class="viewcode-block" id="ExplorationPolicy.requires_action_values"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">[docs]</a>    <span class="k">def</span> <span class="nf">requires_action_values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
         <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">        Allows exploration policies to define if they require the action values for the current step.</span>
 <span class="sd">        This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/greedy.html b/docs/_modules/rl_coach/exploration_policies/greedy.html
index fe0d0fd..8bcfca2 100644
--- a/docs/_modules/rl_coach/exploration_policies/greedy.html
+++ b/docs/_modules/rl_coach/exploration_policies/greedy.html
@@ -209,7 +209,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.greedy:Greedy&#39;</span>
 
 
-<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="Greedy"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.greedy.Greedy">[docs]</a><span class="k">class</span> <span class="nc">Greedy</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    The Greedy exploration policy is intended for both discrete and continuous action spaces.</span>
 <span class="sd">    For discrete action spaces, it always selects the action with the maximum value, as given by the agent.</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/ou_process.html b/docs/_modules/rl_coach/exploration_policies/ou_process.html
index 0df44f5..15e0dcb 100644
--- a/docs/_modules/rl_coach/exploration_policies/ou_process.html
+++ b/docs/_modules/rl_coach/exploration_policies/ou_process.html
@@ -219,7 +219,7 @@
 
 
 <span class="c1"># Ornstein-Uhlenbeck process</span>
-<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="OUProcess"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ou_process.OUProcess">[docs]</a><span class="k">class</span> <span class="nc">OUProcess</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    OUProcess exploration policy is intended for continuous action spaces, and selects the action according to</span>
 <span class="sd">    an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
index dcd0aea..fd5b326 100644
--- a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
+++ b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html
@@ -210,7 +210,8 @@
 <span class="k">class</span> <span class="nc">ParameterNoiseParameters</span><span class="p">(</span><span class="n">ExplorationParameters</span><span class="p">):</span>
     <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent_params</span><span class="p">:</span> <span class="n">AgentParameters</span><span class="p">):</span>
         <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">agent_params</span><span class="p">,</span> <span class="n">DQNAgentParameters</span><span class="p">):</span>
+
+        <span class="k">if</span> <span class="ow">not</span> <span class="n">agent_params</span><span class="o">.</span><span class="n">algorithm</span><span class="o">.</span><span class="n">supports_parameter_noise</span><span class="p">:</span>
             <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Currently only DQN variants are supported for using an exploration type of &quot;</span>
                              <span class="s2">&quot;ParameterNoise.&quot;</span><span class="p">)</span>
 
@@ -221,7 +222,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.parameter_noise:ParameterNoise&#39;</span>
 
 
-<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="ParameterNoise"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.parameter_noise.ParameterNoise">[docs]</a><span class="k">class</span> <span class="nc">ParameterNoise</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.</span>
 <span class="sd">    It applies the exploration policy by replacing all the dense network layers with noisy layers.</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
index 04a6205..11b9bfc 100644
--- a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
+++ b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html
@@ -218,7 +218,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.truncated_normal:TruncatedNormal&#39;</span>
 
 
-<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
+<div class="viewcode-block" id="TruncatedNormal"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.truncated_normal.TruncatedNormal">[docs]</a><span class="k">class</span> <span class="nc">TruncatedNormal</span><span class="p">(</span><span class="n">ExplorationPolicy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a</span>
 <span class="sd">    normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t</span>
diff --git a/docs/_modules/rl_coach/exploration_policies/ucb.html b/docs/_modules/rl_coach/exploration_policies/ucb.html
index 88b0978..562dcba 100644
--- a/docs/_modules/rl_coach/exploration_policies/ucb.html
+++ b/docs/_modules/rl_coach/exploration_policies/ucb.html
@@ -222,7 +222,7 @@
         <span class="k">return</span> <span class="s1">&#39;rl_coach.exploration_policies.ucb:UCB&#39;</span>
 
 
-<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
+<div class="viewcode-block" id="UCB"><a class="viewcode-back" href="../../../components/exploration_policies/index.html#rl_coach.exploration_policies.ucb.UCB">[docs]</a><span class="k">class</span> <span class="nc">UCB</span><span class="p">(</span><span class="n">EGreedy</span><span class="p">):</span>
     <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.</span>
 <span class="sd">    It assumes that there are multiple network heads that are predicting action values, and that the standard deviation</span>
diff --git a/docs/_modules/rl_coach/memories/backend/redis.html b/docs/_modules/rl_coach/memories/backend/redis.html
index 842700d..b00fab0 100644
--- a/docs/_modules/rl_coach/memories/backend/redis.html
+++ b/docs/_modules/rl_coach/memories/backend/redis.html
@@ -178,7 +178,23 @@
            <div itemprop="articleBody">
             
   <h1>Source code for rl_coach.memories.backend.redis</h1><div class="highlight"><pre>
-<span></span>
+<span></span><span class="c1">#</span>
+<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#      http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
 <span class="kn">import</span> <span class="nn">redis</span>
 <span class="kn">import</span> <span class="nn">pickle</span>
 <span class="kn">import</span> <span class="nn">uuid</span>
diff --git a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
index cb6bf56..0fcd72b 100644
--- a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
+++ b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html
@@ -178,7 +178,24 @@
            <div itemprop="articleBody">
             
   <h1>Source code for rl_coach.memories.non_episodic.transition_collection</h1><div class="highlight"><pre>
-<span></span><span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
+<span></span><span class="c1">#</span>
+<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#      http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
+<span class="kn">from</span> <span class="nn">rl_coach.core_types</span> <span class="k">import</span> <span class="n">Transition</span>
 
 
 <div class="viewcode-block" id="TransitionCollection"><a class="viewcode-back" href="../../../../components/memories/index.html#rl_coach.memories.non_episodic.TransitionCollection">[docs]</a><span class="k">class</span> <span class="nc">TransitionCollection</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
diff --git a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
index 83db11f..9932e81 100644
--- a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
+++ b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html
@@ -178,7 +178,24 @@
            <div itemprop="articleBody">
             
   <h1>Source code for rl_coach.orchestrators.kubernetes_orchestrator</h1><div class="highlight"><pre>
-<span></span><span class="kn">import</span> <span class="nn">os</span>
+<span></span><span class="c1">#</span>
+<span class="c1"># Copyright (c) 2017 Intel Corporation</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#      http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
+<span class="kn">import</span> <span class="nn">os</span>
 <span class="kn">import</span> <span class="nn">uuid</span>
 <span class="kn">import</span> <span class="nn">json</span>
 <span class="kn">import</span> <span class="nn">time</span>
diff --git a/docs/_sources/components/exploration_policies/index.rst.txt b/docs/_sources/components/exploration_policies/index.rst.txt
index 10b6c77..3d56dcc 100644
--- a/docs/_sources/components/exploration_policies/index.rst.txt
+++ b/docs/_sources/components/exploration_policies/index.rst.txt
@@ -38,50 +38,50 @@ spaces.
 
 ExplorationPolicy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
+.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
    :members:
    :inherited-members:
 
 AdditiveNoise
 -------------
-.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
+.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
 
 Boltzmann
 ---------
-.. autoclass:: rl_coach.exploration_policies.Boltzmann
+.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
 
 Bootstrapped
 ------------
-.. autoclass:: rl_coach.exploration_policies.Bootstrapped
+.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
 
 Categorical
 -----------
-.. autoclass:: rl_coach.exploration_policies.Categorical
+.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
 
 ContinuousEntropy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
+.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
 
 EGreedy
 -------
-.. autoclass:: rl_coach.exploration_policies.EGreedy
+.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
 
 Greedy
 ------
-.. autoclass:: rl_coach.exploration_policies.Greedy
+.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
 
 OUProcess
 ---------
-.. autoclass:: rl_coach.exploration_policies.OUProcess
+.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
 
 ParameterNoise
 --------------
-.. autoclass:: rl_coach.exploration_policies.ParameterNoise
+.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
 
 TruncatedNormal
 ---------------
-.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
+.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
 
 UCB
 ---
-.. autoclass:: rl_coach.exploration_policies.UCB
\ No newline at end of file
+.. autoclass:: rl_coach.exploration_policies.ucb.UCB
\ No newline at end of file
diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt
index ca786ee..16c7024 100644
--- a/docs/_sources/index.rst.txt
+++ b/docs/_sources/index.rst.txt
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
 
 * `Release 0.10.0 <https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)>`_
 
-* `Release 0.11.0 <https://ai.intel.com/>`_ (current release)
+* `Release 0.11.0 <https://ai.intel.com/rl-coach-data-science-at-scale/>`_ (current release)
 
 You can find more details in the `GitHub repository <https://github.com/NervanaSystems/coach>`_.
 
diff --git a/docs/components/exploration_policies/index.html b/docs/components/exploration_policies/index.html
index f9d658a..388bbc8 100644
--- a/docs/components/exploration_policies/index.html
+++ b/docs/components/exploration_policies/index.html
@@ -264,8 +264,8 @@ spaces.</p>
 <div class="section" id="explorationpolicy">
 <h2>ExplorationPolicy<a class="headerlink" href="#explorationpolicy" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.ExplorationPolicy">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ExplorationPolicy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.exploration_policy.</code><code class="descname">ExplorationPolicy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy" title="Permalink to this definition">¶</a></dt>
 <dd><p>An exploration policy takes the predicted actions or action values from the agent, and selects the action to
 actually apply to the environment using some predefined algorithm.</p>
 <table class="docutils field-list" frame="void" rules="none">
@@ -277,16 +277,16 @@ actually apply to the environment using some predefined algorithm.</p>
 </tbody>
 </table>
 <dl class="method">
-<dt id="rl_coach.exploration_policies.ExplorationPolicy.change_phase">
-<code class="descname">change_phase</code><span class="sig-paren">(</span><em>phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">
+<code class="descname">change_phase</code><span class="sig-paren">(</span><em>phase</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.change_phase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase" title="Permalink to this definition">¶</a></dt>
 <dd><p>Change between running phases of the algorithm
 :param phase: Either Heatup or Train
 :return: none</p>
 </dd></dl>
 
 <dl class="method">
-<dt id="rl_coach.exploration_policies.ExplorationPolicy.get_action">
-<code class="descname">get_action</code><span class="sig-paren">(</span><em>action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> &#x2192; Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">
+<code class="descname">get_action</code><span class="sig-paren">(</span><em>action_values: List[Union[int, float, numpy.ndarray, List]]</em><span class="sig-paren">)</span> &#x2192; Union[int, float, numpy.ndarray, List]<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.get_action"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action" title="Permalink to this definition">¶</a></dt>
 <dd><p>Given a list of values corresponding to each action, 
 choose one actions according to the exploration policy
 :param action_values: A list of action values
@@ -294,8 +294,8 @@ choose one actions according to the exploration policy
 </dd></dl>
 
 <dl class="method">
-<dt id="rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">
-<code class="descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">
+<code class="descname">requires_action_values</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; bool<a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.requires_action_values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values" title="Permalink to this definition">¶</a></dt>
 <dd><p>Allows exploration policies to define if they require the action values for the current step.
 This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller
 than epsilon, the action is completely random, and the action values don’t need to be calculated
@@ -303,8 +303,8 @@ than epsilon, the action is completely random, and the action values don’t nee
 </dd></dl>
 
 <dl class="method">
-<dt id="rl_coach.exploration_policies.ExplorationPolicy.reset">
-<code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">
+<code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/exploration_policy.html#ExplorationPolicy.reset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset" title="Permalink to this definition">¶</a></dt>
 <dd><p>Used for resetting the exploration policy parameters when needed
 :return: None</p>
 </dd></dl>
@@ -315,8 +315,8 @@ than epsilon, the action is completely random, and the action values don’t nee
 <div class="section" id="additivenoise">
 <h2>AdditiveNoise<a class="headerlink" href="#additivenoise" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.AdditiveNoise">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">AdditiveNoise</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.additive_noise.AdditiveNoise">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.additive_noise.</code><code class="descname">AdditiveNoise</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/additive_noise.html#AdditiveNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.additive_noise.AdditiveNoise" title="Permalink to this definition">¶</a></dt>
 <dd><p>AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent
 and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that
 can be given in two different ways:
@@ -343,8 +343,8 @@ of the action space</li>
 <div class="section" id="boltzmann">
 <h2>Boltzmann<a class="headerlink" href="#boltzmann" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.Boltzmann">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Boltzmann</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Boltzmann" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.boltzmann.Boltzmann">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.boltzmann.</code><code class="descname">Boltzmann</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>temperature_schedule: rl_coach.schedules.Schedule</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/boltzmann.html#Boltzmann"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.boltzmann.Boltzmann" title="Permalink to this definition">¶</a></dt>
 <dd><p>The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible
 actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values
 into a distribution over the actions. It then samples the action for playing out of the calculated distribution.
@@ -367,8 +367,8 @@ An additional temperature schedule can be given by the user, and will control th
 <div class="section" id="bootstrapped">
 <h2>Bootstrapped<a class="headerlink" href="#bootstrapped" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.Bootstrapped">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Bootstrapped</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Bootstrapped" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.bootstrapped.Bootstrapped">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.bootstrapped.</code><code class="descname">Bootstrapped</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/bootstrapped.html#Bootstrapped"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.bootstrapped.Bootstrapped" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bootstrapped exploration policy is currently only used for discrete action spaces along with the
 Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the
 values for all the possible actions. For each episode, a single head is selected to lead the agent, according
@@ -401,8 +401,8 @@ if the e-greedy is used for a continuous policy</li>
 <div class="section" id="categorical">
 <h2>Categorical<a class="headerlink" href="#categorical" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.Categorical">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Categorical</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Categorical" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.categorical.Categorical">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.categorical.</code><code class="descname">Categorical</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/categorical.html#Categorical"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.categorical.Categorical" title="Permalink to this definition">¶</a></dt>
 <dd><p>Categorical exploration policy is intended for discrete action spaces. It expects the action values to
 represent a probability distribution over the action, from which a single action will be sampled.
 In evaluation, the action that has the highest probability will be selected. This is particularly useful for
@@ -421,8 +421,8 @@ actor-critic schemes, where the actors output is a probability distribution over
 <div class="section" id="continuousentropy">
 <h2>ContinuousEntropy<a class="headerlink" href="#continuousentropy" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.ContinuousEntropy">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ContinuousEntropy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.continuous_entropy.</code><code class="descname">ContinuousEntropy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/continuous_entropy.html#ContinuousEntropy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy" title="Permalink to this definition">¶</a></dt>
 <dd><p>Continuous entropy is an exploration policy that is actually implemented as part of the network.
 The exploration policy class is only a placeholder for choosing this policy. The exploration policy is
 implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action.
@@ -453,8 +453,8 @@ of the action space</li>
 <div class="section" id="egreedy">
 <h2>EGreedy<a class="headerlink" href="#egreedy" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.EGreedy">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">EGreedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.EGreedy" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.e_greedy.EGreedy">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.e_greedy.</code><code class="descname">EGreedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/e_greedy.html#EGreedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.e_greedy.EGreedy" title="Permalink to this definition">¶</a></dt>
 <dd><p>e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.</p>
 <p>For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the
 highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the
@@ -485,8 +485,8 @@ if the e-greedy is used for a continuous policy</li>
 <div class="section" id="greedy">
 <h2>Greedy<a class="headerlink" href="#greedy" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.Greedy">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">Greedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.Greedy" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.greedy.Greedy">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.greedy.</code><code class="descname">Greedy</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/greedy.html#Greedy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.greedy.Greedy" title="Permalink to this definition">¶</a></dt>
 <dd><p>The Greedy exploration policy is intended for both discrete and continuous action spaces.
 For discrete action spaces, it always selects the action with the maximum value, as given by the agent.
 For continuous action spaces, it always return the exact action, as it was given by the agent.</p>
@@ -504,8 +504,8 @@ For continuous action spaces, it always return the exact action, as it was given
 <div class="section" id="ouprocess">
 <h2>OUProcess<a class="headerlink" href="#ouprocess" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.OUProcess">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">OUProcess</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>mu: float = 0</em>, <em>theta: float = 0.15</em>, <em>sigma: float = 0.2</em>, <em>dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.OUProcess" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.ou_process.OUProcess">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ou_process.</code><code class="descname">OUProcess</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>mu: float = 0</em>, <em>theta: float = 0.15</em>, <em>sigma: float = 0.2</em>, <em>dt: float = 0.01</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ou_process.html#OUProcess"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ou_process.OUProcess" title="Permalink to this definition">¶</a></dt>
 <dd><p>OUProcess exploration policy is intended for continuous action spaces, and selects the action according to
 an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where
 the samples are correlated between consequent time steps.</p>
@@ -523,8 +523,8 @@ the samples are correlated between consequent time steps.</p>
 <div class="section" id="parameternoise">
 <h2>ParameterNoise<a class="headerlink" href="#parameternoise" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.ParameterNoise">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">ParameterNoise</code><span class="sig-paren">(</span><em>network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ParameterNoise" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.parameter_noise.ParameterNoise">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.parameter_noise.</code><code class="descname">ParameterNoise</code><span class="sig-paren">(</span><em>network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/parameter_noise.html#ParameterNoise"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.parameter_noise.ParameterNoise" title="Permalink to this definition">¶</a></dt>
 <dd><p>The ParameterNoise exploration policy is intended for both discrete and continuous action spaces.
 It applies the exploration policy by replacing all the dense network layers with noisy layers.
 The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network
@@ -545,8 +545,8 @@ values.</p>
 <div class="section" id="truncatednormal">
 <h2>TruncatedNormal<a class="headerlink" href="#truncatednormal" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.TruncatedNormal">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">TruncatedNormal</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em>, <em>clip_low: float</em>, <em>clip_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.truncated_normal.TruncatedNormal">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.truncated_normal.</code><code class="descname">TruncatedNormal</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>noise_percentage_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_noise_percentage: float</em>, <em>clip_low: float</em>, <em>clip_high: float</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/truncated_normal.html#TruncatedNormal"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.truncated_normal.TruncatedNormal" title="Permalink to this definition">¶</a></dt>
 <dd><p>The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a
 normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t
 wo different ways:
@@ -575,8 +575,8 @@ of the action space</li>
 <div class="section" id="ucb">
 <h2>UCB<a class="headerlink" href="#ucb" title="Permalink to this headline">¶</a></h2>
 <dl class="class">
-<dt id="rl_coach.exploration_policies.UCB">
-<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.</code><code class="descname">UCB</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>lamb: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.UCB" title="Permalink to this definition">¶</a></dt>
+<dt id="rl_coach.exploration_policies.ucb.UCB">
+<em class="property">class </em><code class="descclassname">rl_coach.exploration_policies.ucb.</code><code class="descname">UCB</code><span class="sig-paren">(</span><em>action_space: rl_coach.spaces.ActionSpace</em>, <em>epsilon_schedule: rl_coach.schedules.Schedule</em>, <em>evaluation_epsilon: float</em>, <em>architecture_num_q_heads: int</em>, <em>lamb: int</em>, <em>continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = &lt;rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/rl_coach/exploration_policies/ucb.html#UCB"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#rl_coach.exploration_policies.ucb.UCB" title="Permalink to this definition">¶</a></dt>
 <dd><p>UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
 It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
 between the heads predictions represents the uncertainty of the agent in each of the actions.
diff --git a/docs/genindex.html b/docs/genindex.html
index 8198172..e4ca940 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -226,7 +226,7 @@
 </li>
       <li><a href="components/agents/policy_optimization/ac.html#rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters">ActorCriticAlgorithmParameters (class in rl_coach.agents.actor_critic_agent)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.AdditiveNoise">AdditiveNoise (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.additive_noise.AdditiveNoise">AdditiveNoise (class in rl_coach.exploration_policies.additive_noise)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
@@ -262,11 +262,11 @@
 </li>
       <li><a href="components/agents/imitation/bc.html#rl_coach.agents.bc_agent.BCAlgorithmParameters">BCAlgorithmParameters (class in rl_coach.agents.bc_agent)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Boltzmann">Boltzmann (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.boltzmann.Boltzmann">Boltzmann (class in rl_coach.exploration_policies.boltzmann)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Bootstrapped">Bootstrapped (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.bootstrapped.Bootstrapped">Bootstrapped (class in rl_coach.exploration_policies.bootstrapped)</a>
 </li>
       <li><a href="components/spaces.html#rl_coach.spaces.BoxActionSpace">BoxActionSpace (class in rl_coach.spaces)</a>
 </li>
@@ -288,11 +288,11 @@
       </ul></li>
       <li><a href="components/environments/index.html#rl_coach.environments.carla_environment.CarlaEnvironment">CarlaEnvironment (class in rl_coach.environments.carla_environment)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Categorical">Categorical (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.categorical.Categorical">Categorical (class in rl_coach.exploration_policies.categorical)</a>
 </li>
       <li><a href="components/agents/value_optimization/categorical_dqn.html#rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters">CategoricalDQNAlgorithmParameters (class in rl_coach.agents.categorical_dqn_agent)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.change_phase">change_phase() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.change_phase">change_phase() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
 </li>
       <li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.choose_action">choose_action() (rl_coach.agents.agent.Agent method)</a>
 
@@ -328,7 +328,7 @@
 </li>
       <li><a href="components/architectures/index.html#rl_coach.architectures.architecture.Architecture.construct">construct() (rl_coach.architectures.architecture.Architecture static method)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ContinuousEntropy">ContinuousEntropy (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy">ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)</a>
 </li>
       <li><a href="components/environments/index.html#rl_coach.environments.control_suite_environment.ControlSuiteEnvironment">ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)</a>
 </li>
@@ -368,7 +368,7 @@
 <h2 id="E">E</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.EGreedy">EGreedy (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.e_greedy.EGreedy">EGreedy (class in rl_coach.exploration_policies.e_greedy)</a>
 </li>
       <li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.emulate_act_on_trainer">emulate_act_on_trainer() (rl_coach.agents.agent.Agent method)</a>
 
@@ -398,7 +398,7 @@
 </li>
       <li><a href="components/memories/index.html#rl_coach.memories.non_episodic.ExperienceReplay">ExperienceReplay (class in rl_coach.memories.non_episodic)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy">ExplorationPolicy (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy">ExplorationPolicy (class in rl_coach.exploration_policies.exploration_policy)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -416,7 +416,7 @@
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="components/core_types.html#rl_coach.core_types.Batch.game_overs">game_overs() (rl_coach.core_types.Batch method)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.get_action">get_action() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.get_action">get_action() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
 </li>
       <li><a href="components/environments/index.html#rl_coach.environments.environment.Environment.get_action_from_user">get_action_from_user() (rl_coach.environments.environment.Environment method)</a>
 </li>
@@ -466,7 +466,7 @@
 </li>
       <li><a href="components/spaces.html#rl_coach.spaces.GoalsSpace.DistanceMetric">GoalsSpace.DistanceMetric (class in rl_coach.spaces)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.Greedy">Greedy (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.greedy.Greedy">Greedy (class in rl_coach.exploration_policies.greedy)</a>
 </li>
       <li><a href="components/environments/index.html#rl_coach.environments.gym_environment.GymEnvironment">GymEnvironment (class in rl_coach.environments.gym_environment)</a>
 </li>
@@ -626,7 +626,7 @@
         <li><a href="test.html#rl_coach.agents.dqn_agent.DQNAgent.observe">(rl_coach.agents.dqn_agent.DQNAgent method)</a>
 </li>
       </ul></li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.OUProcess">OUProcess (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ou_process.OUProcess">OUProcess (class in rl_coach.exploration_policies.ou_process)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -640,7 +640,7 @@
 </li>
       <li><a href="components/architectures/index.html#rl_coach.architectures.network_wrapper.NetworkWrapper.parallel_prediction">parallel_prediction() (rl_coach.architectures.network_wrapper.NetworkWrapper method)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ParameterNoise">ParameterNoise (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.parameter_noise.ParameterNoise">ParameterNoise (class in rl_coach.exploration_policies.parameter_noise)</a>
 </li>
       <li><a href="components/agents/index.html#rl_coach.agents.agent.Agent.parent">parent (rl_coach.agents.agent.Agent attribute)</a>
 
@@ -714,9 +714,9 @@
       </ul></li>
       <li><a href="components/environments/index.html#rl_coach.environments.environment.Environment.render">render() (rl_coach.environments.environment.Environment method)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.requires_action_values">requires_action_values() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.requires_action_values">requires_action_values() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ExplorationPolicy.reset">reset() (rl_coach.exploration_policies.ExplorationPolicy method)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.exploration_policy.ExplorationPolicy.reset">reset() (rl_coach.exploration_policies.exploration_policy.ExplorationPolicy method)</a>
 </li>
       <li><a href="components/architectures/index.html#rl_coach.architectures.architecture.Architecture.reset_accumulated_gradients">reset_accumulated_gradients() (rl_coach.architectures.architecture.Architecture method)</a>
 </li>
@@ -870,7 +870,7 @@
 </li>
       <li><a href="components/memories/index.html#rl_coach.memories.non_episodic.TransitionCollection">TransitionCollection (class in rl_coach.memories.non_episodic)</a>
 </li>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.TruncatedNormal">TruncatedNormal (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.truncated_normal.TruncatedNormal">TruncatedNormal (class in rl_coach.exploration_policies.truncated_normal)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -878,7 +878,7 @@
 <h2 id="U">U</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.UCB">UCB (class in rl_coach.exploration_policies)</a>
+      <li><a href="components/exploration_policies/index.html#rl_coach.exploration_policies.ucb.UCB">UCB (class in rl_coach.exploration_policies.ucb)</a>
 </li>
       <li><a href="components/core_types.html#rl_coach.core_types.Episode.update_discounted_rewards">update_discounted_rewards() (rl_coach.core_types.Episode method)</a>
 </li>
diff --git a/docs/index.html b/docs/index.html
index 90e8c90..a82e4d0 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -194,7 +194,7 @@ Coach collects statistics from the training process and supports advanced visual
 <li><a class="reference external" href="https://ai.intel.com/reinforcement-learning-coach-intel/">Release 0.8.0</a> (initial release)</li>
 <li><a class="reference external" href="https://ai.intel.com/reinforcement-learning-coach-carla-qr-dqn/">Release 0.9.0</a></li>
 <li><a class="reference external" href="https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)">Release 0.10.0</a></li>
-<li><a class="reference external" href="https://ai.intel.com/">Release 0.11.0</a> (current release)</li>
+<li><a class="reference external" href="https://ai.intel.com/rl-coach-data-science-at-scale/">Release 0.11.0</a> (current release)</li>
 </ul>
 <p>You can find more details in the <a class="reference external" href="https://github.com/NervanaSystems/coach">GitHub repository</a>.</p>
 <div class="toctree-wrapper compound">
diff --git a/docs/objects.inv b/docs/objects.inv
index b89806e..4c3b349 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/searchindex.js b/docs/searchindex.js
index f7e02fe..5feb194 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies":{AdditiveNoise:[27,0,1,""],Boltzmann:[27,0,1,""],Bootstrapped:[27,0,1,""],Categorical:[27,0,1,""],ContinuousEntropy:[27,0,1,""],EGreedy:[27,0,1,""],ExplorationPolicy:[27,0,1,""],Greedy:[27,0,1,""],OUProcess:[27,0,1,""],ParameterNoise:[27,0,1,""],TruncatedNormal:[27,0,1,""],UCB:[27,0,1,""]},"rl_coach.exploration_policies.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}})
\ No newline at end of file
+Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[27,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[27,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[27,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[27,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[27,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[27,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[27,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[27,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[27,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[27,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[34,1,1,""],sample:[34,1,1,""],val_matches_space_definition:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_entropi:27,continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],e_greedi:27,each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],ou_process:27,our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameter_nois:27,parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],truncated_norm:27,ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}})
\ No newline at end of file
diff --git a/docs_raw/source/components/exploration_policies/index.rst b/docs_raw/source/components/exploration_policies/index.rst
index 10b6c77..3d56dcc 100644
--- a/docs_raw/source/components/exploration_policies/index.rst
+++ b/docs_raw/source/components/exploration_policies/index.rst
@@ -38,50 +38,50 @@ spaces.
 
 ExplorationPolicy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy
+.. autoclass:: rl_coach.exploration_policies.exploration_policy.ExplorationPolicy
    :members:
    :inherited-members:
 
 AdditiveNoise
 -------------
-.. autoclass:: rl_coach.exploration_policies.AdditiveNoise
+.. autoclass:: rl_coach.exploration_policies.additive_noise.AdditiveNoise
 
 Boltzmann
 ---------
-.. autoclass:: rl_coach.exploration_policies.Boltzmann
+.. autoclass:: rl_coach.exploration_policies.boltzmann.Boltzmann
 
 Bootstrapped
 ------------
-.. autoclass:: rl_coach.exploration_policies.Bootstrapped
+.. autoclass:: rl_coach.exploration_policies.bootstrapped.Bootstrapped
 
 Categorical
 -----------
-.. autoclass:: rl_coach.exploration_policies.Categorical
+.. autoclass:: rl_coach.exploration_policies.categorical.Categorical
 
 ContinuousEntropy
 -----------------
-.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy
+.. autoclass:: rl_coach.exploration_policies.continuous_entropy.ContinuousEntropy
 
 EGreedy
 -------
-.. autoclass:: rl_coach.exploration_policies.EGreedy
+.. autoclass:: rl_coach.exploration_policies.e_greedy.EGreedy
 
 Greedy
 ------
-.. autoclass:: rl_coach.exploration_policies.Greedy
+.. autoclass:: rl_coach.exploration_policies.greedy.Greedy
 
 OUProcess
 ---------
-.. autoclass:: rl_coach.exploration_policies.OUProcess
+.. autoclass:: rl_coach.exploration_policies.ou_process.OUProcess
 
 ParameterNoise
 --------------
-.. autoclass:: rl_coach.exploration_policies.ParameterNoise
+.. autoclass:: rl_coach.exploration_policies.parameter_noise.ParameterNoise
 
 TruncatedNormal
 ---------------
-.. autoclass:: rl_coach.exploration_policies.TruncatedNormal
+.. autoclass:: rl_coach.exploration_policies.truncated_normal.TruncatedNormal
 
 UCB
 ---
-.. autoclass:: rl_coach.exploration_policies.UCB
\ No newline at end of file
+.. autoclass:: rl_coach.exploration_policies.ucb.UCB
\ No newline at end of file
diff --git a/docs_raw/source/index.rst b/docs_raw/source/index.rst
index ca786ee..16c7024 100644
--- a/docs_raw/source/index.rst
+++ b/docs_raw/source/index.rst
@@ -25,7 +25,7 @@ Blog posts from the Intel® AI website:
 
 * `Release 0.10.0 <https://ai.intel.com/introducing-reinforcement-learning-coach-0-10-0/)>`_
 
-* `Release 0.11.0 <https://ai.intel.com/>`_ (current release)
+* `Release 0.11.0 <https://ai.intel.com/rl-coach-data-science-at-scale/>`_ (current release)
 
 You can find more details in the `GitHub repository <https://github.com/NervanaSystems/coach>`_.
 
diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py
index 21be6be..53dc4c3 100644
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -15,13 +15,11 @@
 #
 
 import copy
-import os
 import random
 from collections import OrderedDict
 from typing import Dict, List, Union, Tuple
 
 import numpy as np
-from pandas import read_pickle
 from six.moves import range
 
 from rl_coach.agents.agent_interface import AgentInterface
diff --git a/rl_coach/coach.py b/rl_coach/coach.py
index 33f83ba..a3ded7e 100644
--- a/rl_coach/coach.py
+++ b/rl_coach/coach.py
@@ -35,7 +35,6 @@ from multiprocessing.managers import BaseManager
 import subprocess
 from rl_coach.graph_managers.graph_manager import HumanPlayScheduleParameters, GraphManager
 from rl_coach.utils import list_all_presets, short_dynamic_import, get_open_port, SharedMemoryScratchPad, get_base_dir
-from rl_coach.agents.human_agent import HumanAgentParameters
 from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
 from rl_coach.environments.environment import SingleLevelSelection
 from rl_coach.memories.backend.redis import RedisPubSubMemoryBackendParameters
@@ -229,6 +228,8 @@ class CoachLauncher(object):
 
         # for human play we need to create a custom graph manager
         if args.play:
+            from rl_coach.agents.human_agent import HumanAgentParameters
+
             env_params = short_dynamic_import(args.environment_type, ignore_module_case=True)()
             env_params.human_control = True
             schedule_params = HumanPlayScheduleParameters()
diff --git a/rl_coach/exploration_policies/__init__.py b/rl_coach/exploration_policies/__init__.py
index 922390f..e69de29 100644
--- a/rl_coach/exploration_policies/__init__.py
+++ b/rl_coach/exploration_policies/__init__.py
@@ -1,55 +0,0 @@
-#
-# Copyright (c) 2017 Intel Corporation 
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from .additive_noise import AdditiveNoiseParameters, AdditiveNoise
-from .boltzmann import BoltzmannParameters, Boltzmann
-from .bootstrapped import BootstrappedParameters, Bootstrapped
-from .categorical import CategoricalParameters, Categorical
-from .continuous_entropy import ContinuousEntropyParameters, ContinuousEntropy
-from .e_greedy import EGreedyParameters, EGreedy
-from .exploration_policy import ExplorationParameters, ExplorationPolicy
-from .greedy import GreedyParameters, Greedy
-from .ou_process import OUProcessParameters, OUProcess
-from .parameter_noise import ParameterNoiseParameters, ParameterNoise
-from .truncated_normal import TruncatedNormalParameters, TruncatedNormal
-from .ucb import UCBParameters, UCB
-
-__all__ = [
-    'AdditiveNoiseParameters',
-    'AdditiveNoise',
-    'BoltzmannParameters',
-    'Boltzmann',
-    'BootstrappedParameters',
-    'Bootstrapped',
-    'CategoricalParameters',
-    'Categorical',
-    'ContinuousEntropyParameters',
-    'ContinuousEntropy',
-    'EGreedyParameters',
-    'EGreedy',
-    'ExplorationParameters',
-    'ExplorationPolicy',
-    'GreedyParameters',
-    'Greedy',
-    'OUProcessParameters',
-    'OUProcess',
-    'ParameterNoiseParameters',
-    'ParameterNoise',
-    'TruncatedNormalParameters',
-    'TruncatedNormal',
-    'UCBParameters',
-    'UCB'
-]