|
- |
- |
+
- collect_savers() (rl_coach.agents.agent.Agent method)
- construct() (rl_coach.architectures.architecture.Architecture static method)
+ - contains() (rl_coach.spaces.ActionSpace method)
+
+
- ContinuousEntropy (class in rl_coach.exploration_policies.continuous_entropy)
- ControlSuiteEnvironment (class in rl_coach.environments.control_suite_environment)
@@ -911,21 +923,11 @@
V
- |
|
+ |
@@ -968,7 +970,8 @@
-
+
+
diff --git a/docs/index.html b/docs/index.html
index a82e4d0..a855c3f 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -289,7 +289,8 @@ Coach collects statistics from the training process and supports advanced visual
-
+
+
diff --git a/docs/objects.inv b/docs/objects.inv
index 4c3b349..a52335b 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/search.html b/docs/search.html
index cb2b468..82c00ef 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -226,7 +226,8 @@
-
+
+
diff --git a/docs/searchindex.js b/docs/searchindex.js
index a4082cf..d8e582e 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[48,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[48,1,1,""],call_memory:[48,1,1,""],choose_action:[48,1,1,""],collect_savers:[48,1,1,""],create_networks:[48,1,1,""],emulate_act_on_trainer:[48,1,1,""],emulate_observe_on_trainer:[48,1,1,""],get_predictions:[48,1,1,""],get_state_embedding:[48,1,1,""],handle_episode_ended:[48,1,1,""],init_environment_dependent_modules:[48,1,1,""],learn_from_batch:[48,1,1,""],log_to_screen:[48,1,1,""],observe:[48,1,1,""],parent:[48,2,1,""],phase:[48,2,1,""],post_training_commands:[48,1,1,""],prepare_batch_for_inference:[48,1,1,""],register_signal:[48,1,1,""],reset_evaluation_state:[48,1,1,""],reset_internal_state:[48,1,1,""],restore_checkpoint:[48,1,1,""],run_pre_network_filter_for_inference:[48,1,1,""],save_checkpoint:[48,1,1,""],set_environment_parameters:[48,1,1,""],set_incoming_directive:[48,1,1,""],set_session:[48,1,1,""],setup_logger:[48,1,1,""],sync:[48,1,1,""],train:[48,1,1,""],update_log:[48,1,1,""],update_step_in_episode_log:[48,1,1,""],update_transition_before_adding_to_replay_buffer:[48,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],collect_savers:[23,1,1,""],construct:[23,3,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],collect_savers:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[25,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[25,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[26,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[26,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[26,0,1,""]},"rl_coach.environments.environment":{Environment:[26,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[26,2,1,""],close:[26,1,1,""],get_action_from_user:[26,1,1,""],get_available_keys:[26,1,1,""],get_goal:[26,1,1,""],get_random_action:[26,1,1,""],get_rendered_image:[26,1,1,""],goal_space:[26,2,1,""],handle_episode_ended:[26,1,1,""],last_env_response:[26,2,1,""],phase:[26,2,1,""],render:[26,1,1,""],reset_internal_state:[26,1,1,""],set_goal:[26,1,1,""],state_space:[26,2,1,""],step:[26,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[26,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[26,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[27,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[27,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[27,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[27,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[27,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[27,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[27,1,1,""],get_action:[27,1,1,""],requires_action_values:[27,1,1,""],reset:[27,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[27,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[27,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[27,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[27,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[27,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[30,0,1,""],BoxDiscretization:[30,0,1,""],BoxMasking:[30,0,1,""],FullDiscreteActionSpaceMap:[30,0,1,""],LinearBoxToBoxMap:[30,0,1,""],PartialDiscreteActionSpaceMap:[30,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[29,0,1,""],ObservationCropFilter:[29,0,1,""],ObservationMoveAxisFilter:[29,0,1,""],ObservationNormalizationFilter:[29,0,1,""],ObservationRGBToYFilter:[29,0,1,""],ObservationReductionBySubPartsNameFilter:[29,0,1,""],ObservationRescaleSizeByFactorFilter:[29,0,1,""],ObservationRescaleToSizeFilter:[29,0,1,""],ObservationSqueezeFilter:[29,0,1,""],ObservationStackingFilter:[29,0,1,""],ObservationToUInt8Filter:[29,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[29,0,1,""],RewardNormalizationFilter:[29,0,1,""],RewardRescaleFilter:[29,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[32,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[31,0,1,""],EpisodicHRLHindsightExperienceReplay:[31,0,1,""],EpisodicHindsightExperienceReplay:[31,0,1,""],SingleEpisodeBuffer:[31,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[31,0,1,""],ExperienceReplay:[31,0,1,""],PrioritizedExperienceReplay:[31,0,1,""],QDND:[31,0,1,""],TransitionCollection:[31,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[33,0,1,""]},"rl_coach.spaces":{ActionSpace:[34,0,1,""],AttentionActionSpace:[34,0,1,""],BoxActionSpace:[34,0,1,""],CompoundActionSpace:[34,0,1,""],DiscreteActionSpace:[34,0,1,""],GoalsSpace:[34,0,1,""],ImageObservationSpace:[34,0,1,""],MultiSelectActionSpace:[34,0,1,""],ObservationSpace:[34,0,1,""],PlanarMapsObservationSpace:[34,0,1,""],Space:[34,0,1,""],VectorObservationSpace:[34,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[34,1,1,""],is_valid_index:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],contains:[34,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[34,0,1,""],clip_action_to_space:[34,1,1,""],distance_from_goal:[34,1,1,""],get_reward_for_goal_and_state:[34,1,1,""],goal_from_state:[34,1,1,""],is_valid_index:[34,1,1,""],sample:[34,1,1,""],sample_with_info:[34,1,1,""],contains:[34,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_valid_index:[34,1,1,""],sample:[34,1,1,""],contains:[34,1,1,""]},"rl_coach.spaces.Space":{is_valid_index:[34,1,1,""],sample:[34,1,1,""],contains:[34,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":30,"160x160":29,"1_0":[12,22],"1st":27,"20x20":30,"210x160":29,"2nd":27,"50k":38,"9_amd64":41,"\u03b3cdot":14,"abstract":[35,39],"boolean":[3,24,34,48],"break":37,"case":[0,3,5,19,23,24,27,34,47,48,49],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,38,42,48],"default":[0,27,49],"enum":[23,26,34],"export":[0,23,41],"final":[7,13,14,16,20,38],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,26,27,29,30,31,34,35,48],"function":[0,1,3,6,7,10,23,26,27,34,35,36,38,40,48],"import":[15,27,31,36,47,49],"int":[0,3,4,5,6,9,12,17,19,21,22,24,26,27,29,30,31,34,48],"long":40,"new":[0,3,6,7,10,19,20,23,24,30,38,39,46,47,48],"return":[0,3,7,9,10,11,16,19,20,22,23,24,26,27,29,31,34,35,36,38,47,48],"short":[0,38],"static":23,"super":[35,36],"switch":37,"true":[0,3,4,5,6,7,10,19,20,22,23,24,26,27,30,31,34,48],"try":[4,43,47],"while":[0,5,7,8,9,10,23,26,37,40,47,49],AWS:41,Adding:[15,46],And:[36,47],But:[37,47],Doing:47,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,26,27,28,29,30,34,35,36,38,39,40,41,43,48,49],Has:23,Its:48,NFS:[25,41],One:[21,49],That:37,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,43,44,46,47,48,49],Then:[4,6,7,11,18,20],There:[6,10,23,27,28,35,36,40,49],These:[1,2,3,21,26,33,39,40,41],Use:[1,2,7,18,19],Used:27,Uses:47,Using:[7,11,13,14,41],Will:23,With:[27,46],__init__:[26,35,36],_index:[5,17],_render:36,_restart_environment_episod:36,_take_act:36,_update_st:36,a2c:47,a3c:[9,17,37,47],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[30,47],about:[3,24,38,48,49],abov:[7,23,38],abs:[17,31],absolut:27,acceler:18,accept:26,access:[23,35,41],accord:[0,3,4,5,7,11,17,23,24,27,34,37,38,40,48],accordingli:[19,34,38,49],account:[4,6,10,19,20,27],accumul:[3,4,5,9,17,19,22,23,29,47,48],accumulate_gradi:23,accumulated_gradi:23,accur:47,achiev:[0,4,6,26,29,31,34,43,47,49],across:[9,16,37],act:[3,4,7,11,21,34,35,38,48],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,26,27,28,31,35,36,38,40,48],action_idx:36,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[26,27],action_space_s:23,action_valu:[24,27],actioninfo:[3,34,38,48],actionspac:[27,34],actiontyp:36,activ:[7,23],actor:[3,6,7,10,27,40,47],actor_critic_ag:5,actorcriticag:35,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,27,30,31],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,27,29,36,38,41],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,27,31,35],adding:[3,10,27,35,48],addit:[3,23,24,26,27,29,31,34,36,37,38,40,46,48],addition:[23,26,29,35,36,38,43,44,49],additional_fetch:23,additional_simulator_paramet:[26,36],additionali:37,additive_nois:27,additivenoiseparamet:27,advanc:[22,46],advantag:[3,5,6,10,15,27],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,26,29,34,48,49],again:27,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,26,27,28,29,30,34,36,37,40,42,43,46,47,48],agent_param:39,agent_paramet:[3,23,48],agentparamet:[3,23,35],aggreg:38,ahead:[4,47],aim:27,algorithm:[3,24,27,35,37,38,39,43,45,46,48],algorithmparamet:[3,35],all:[0,3,9,11,19,20,23,24,26,27,29,30,34,35,36,37,38,39,40,41,44,48,49],allow:[0,3,4,15,23,24,26,27,28,29,30,31,37,38,39,40,46,47,48,49],allow_brak:26,allow_duplicates_in_batch_sampl:31,allow_no_action_to_be_select:34,along:[19,26,27,44],alpha:[16,20,31],alreadi:[19,24,36,47],also:[5,6,19,20,23,26,34,35,37,43,47,49],altern:[26,36,44],alwai:[23,27,30],amazon:41,amazonaw:41,amount:[7,9,16,20,27,38,47],analysi:37,analyz:37,ani:[3,23,24,26,30,31,35,38,39,40,41,48],anoth:[3,15,23,28,48],answer:47,api:[26,40,44,46],appear:[3,48],appli:[0,3,5,7,9,17,23,24,27,29,47,48],applic:47,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,apply_stop_condit:0,appropri:41,approx:7,approxim:[40,47],apt:41,arbitrari:29,architectur:[3,15,35,46,48],architecture_num_q_head:27,area:30,arg:[3,23,41,48],argmax_a:[13,16,20],argument:[3,12,22,23,26,34,38,48],around:[23,24,40],arrai:[3,23,24,26,29,34,36,48],art:[3,42],artifact:41,artifici:31,arxiv:[17,31],aspect:[27,29,37],assign:[0,2,5,23,27],assign_kl_coeffici:23,assign_op:23,assum:[24,27,29,31,47],async:[23,39],async_train:23,asynchron:[5,17,23],atari:[14,26,29,41,49],atari_a3c:49,atari_dqn:49,ath:15,atom:[12,21,22],attach:26,attend:30,attent:30,attentionactionspac:30,attentiondiscret:30,attribut:24,attribute_nam:24,author:[26,43,44],auto_select_all_armi:26,autoclean:41,automat:[23,49],autonom:[26,44,46],autoremov:41,auxiliari:[26,44],avail:[4,23,24,26,37,39,41,46,47,49],averag:[6,10,23,37,38],aws:41,axes:[29,37],axi:[29,37],axis_origin:29,axis_target:29,back:[6,39],backend:[23,39,41,46,49],background:49,backpropag:19,backward:23,balanc:2,band:37,base1:41,base64:41,base:[6,10,16,18,20,26,31,35,38,41,44,47],base_paramet:[0,3,23,26,27],baselin:47,basic:[9,24,39,49],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,31,35,38,48],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:38,becom:[7,39],been:[15,24,29,43,47],befor:[3,5,10,22,23,24,29,38,39,40,41,47,48],begin:[0,4,38],behav:34,behavior:[3,29,31,35,43,47,48,49],being:[3,35,46,47,48],bellman:[12,21,22],benchmark:[37,45,46,47],best:[47,49],beta1:23,beta2:23,beta:[7,9,31],beta_entropi:[5,6,9,10],better:[15,47],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,26,27,30,31,34,35,37,38,40,46,47],bfg:[6,10],big:[10,12,22],bilinear:29,bin:[30,41],binari:11,bind:23,binomi:11,bit:29,blizzard:44,blob:[26,29],block:46,blog:46,boilerpl:38,bolling:37,bool:[0,3,4,5,6,7,10,19,20,22,23,24,26,27,31,34,48],boost:[41,47],bootstrap:[3,5,6,7,10,16,17,19,20,22,24,47],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,26,27,30,47,48],bound:[6,10,12,22,27,34,47],box2d:41,box:[27,30,34],boxactionspac:30,boxdiscret:30,boxmask:30,breakout:49,breakoutdeterminist:[26,49],bring:10,bucket:41,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,31,38,47,48,49],build:[28,46,47],builder:41,built:[35,38],button:[37,49],c51:12,cach:41,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,27,31,35,48],call:[0,3,9,17,23,24,26,38,48],call_memori:[3,48],callabl:34,camera:[26,36],camera_height:26,camera_width:26,cameratyp:[26,36],can:[0,2,3,5,6,7,10,20,23,24,26,27,28,29,30,34,35,36,37,38,40,44,46,48,49],cannot:[3,48],carla:[29,44],carla_environ:26,carlaenviron:26,carlaenvironmentparamet:26,carlo:[3,20],cartpol:[26,36],cartpole_a3c:49,cartpole_clippedppo:[41,49],cartpole_dqn:49,categor:[3,5,47],categori:[28,29],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[29,37],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,37],chain:7,challeng:38,chang:[0,3,6,7,10,11,15,17,20,27,38,41,48],change_phas:27,channel:[26,29],channels_axi:34,check:[0,3,24,34,48],checkpoint:[0,3,23,25,39,41,48,49],checkpoint_dir:[3,48],checkpoint_prefix:[3,48],checkpoint_restore_dir:[0,49],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,chmod:41,choic:[35,41],choos:[3,15,20,27,28,30,34,35,38,40,47,48,49],choose_act:[3,35,38,48],chosen:[3,20,27,30,35,48],chunk:10,cil:47,cil_ag:2,cilalgorithmparamet:2,classic_control:41,clean:[26,35,41],cli:41,clip:[3,7,10,23,29,34,47],clip_action_to_spac:34,clip_critic_target:7,clip_gradi:23,clip_high:27,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:27,clip_max:29,clip_min:29,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:29,clipping_low:29,clone:[3,47],close:26,cmake:41,coach:[0,3,23,25,26,27,28,32,33,35,38,42,43,44,47,49],code:[36,38,47],coeffici:[6,10,23,27,31],collect:[3,6,9,10,17,23,24,31,38,43,46,48,49],collect_sav:[3,23,48],color:29,com:41,combin:[22,40,46,47],comma:0,command:[38,41,49],common:[35,37,41,49],commun:39,compar:[0,10,15,47],complet:[24,27,38],complex:[23,28,38,40,47,49],compon:[3,12,22,23,27,33,35,38,46,48,49],composit:[3,48],compositeag:[3,48],comput:[23,27],concat:23,concentr:38,condit:[0,3],confid:27,config:[26,49],configur:[3,5,9,35,41,48],confus:38,connect:23,connectionist:9,consecut:[7,19],consequ:[17,27],consid:[5,30,37],consist:[7,26,29,30,34,38,44],constantli:49,constantschedul:31,constrain:30,construct:[23,31],consumpt:29,contain:[0,1,2,3,11,23,24,26,36,38,48,49],content:41,contin:39,continu:[1,2,5,7,8,9,18,27,28,30,34,43],continuous_entropi:27,continuous_exploration_policy_paramet:27,contribut:[4,46],control:[2,3,5,6,7,10,23,27,29,37,44,46,47,48],control_suite_environ:26,controlsuiteenviron:26,conveni:[37,49],converg:9,convers:28,convert:[3,24,27,29,34,38,40,48],convolut:[23,40],coordin:30,copi:[7,11,12,13,14,16,17,18,20,21,22,23,41],core:[3,46,48],core_typ:[3,24,26,34,48],correct:[3,47],correctli:23,correl:27,correpond:24,correspond:[2,3,4,12,13,23,24,27,29,34,36,48],could:[3,23,34,41,48],count:16,countabl:30,counter:[3,48],counterpart:40,cpu:[0,23],crd:49,creat:[3,17,23,29,36,48,49],create_network:[3,48],create_target_network:23,creation:[3,48],credenti:41,critic:[3,6,7,10,27,40,47],crop:[29,30],crop_high:29,crop_low:29,cross:[1,12,22],csv:0,ctrl:37,cuda:41,cudnn7:41,curl:41,curr_stat:[3,35,48],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,26,27,29,30,34,35,38,46,47,48],custom:[26,27,34,35,38],custom_reward_threshold:26,cycl:38,dai:49,dashboard:[0,3,41,46,48],data:[0,9,17,23,31,38,39,41,43,46,47,49],data_stor:[25,41],dataset:[6,10,47,49],date:[19,40,47,49],dcp:[41,49],ddpg:47,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,47],deal:47,debug:[0,37,46],decai:[5,6,10,23],decid:[0,3,4,26,35,48],decis:[3,48],decod:41,dedic:23,deep:[0,3,5,11,13,15,17,18,22,48],deepmind:44,def:[35,36],default_act:34,default_input_filt:36,default_output_filt:36,defin:[0,3,5,6,9,10,17,19,20,23,24,26,27,29,30,31,34,35,36,38,39,40,43,44,48,49],definit:[3,23,26,34,36,38,48],delai:47,delta:[12,19,22],demonstr:[1,2,49],dens:27,densiti:16,depend:[0,3,23,29,31,34,36,41,43,47,48],deploi:[33,39],depth:26,descend:47,describ:[3,12,21,29,31,35,38,41,48],descript:[3,30,34,42,49],design:[38,41,46],desir:[30,35],destabil:9,detail:[3,24,42,44,46,49],determin:[2,3,19,24,31,48],determinist:[3,47],dev:41,develop:[38,43],deviat:[9,10,27,29,37],devic:23,dfp:47,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,26,27,34,48],dict_siz:31,dictat:4,dictionari:[2,3,23,24,26,31,34,35,48],did:26,differ:[0,1,2,3,4,5,6,9,10,11,15,23,26,27,29,34,35,36,37,39,40,46,47,48],differenti:15,difficult:[37,43],difficulti:49,dimens:[24,26,29,30],dimension:[10,30],dir:[3,48,49],direct:[3,26,48],directli:[3,5,38,40,48],directori:[0,23,35,37,41,49],disabl:49,disable_fog:26,disappear:26,disassembl:47,discard:[24,29],discount:[7,9,10,16,19,20,22,23,24,47],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,27,28,29,30,34,38],disentangl:38,disk:0,displai:[0,37],distanc:34,distance_from_go:34,distance_metr:34,distancemetr:34,distil:[3,48],distribut:[3,5,9,10,12,21,22,23,25,27,32,33,34,40,46,47,48,49],distributed_coach:39,distributed_coach_synchronization_typ:39,distributedcoachsynchronizationtyp:39,divereg:[6,10],diverg:[6,10,22],dnd:[0,19,47],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:31,doc:41,docker:41,dockerfil:41,document:44,doe:[11,23,29],doesn:39,doing:[6,10,28],domain:40,don:[4,27,37,47],done:[0,3,6,9,10,26,29,36,48,49],doom:[26,36,41,44],doom_basic_bc:49,doom_basic_dqn:49,doom_environ:[26,36,49],doomenviron:[26,36],doomenvironmentparamet:[36,49],doominputfilt:36,doomlevel:26,doomoutputfilt:36,doubl:[3,16,22],down:[23,26],download:41,dpkg:41,dqn:[3,16,17,22,26,27,29,30,38,40,47],dqn_agent:[14,48],dqnagent:48,dqnalgorithmparamet:14,drive:[2,26,44,46],driving_benchmark:26,due:29,duel:[3,22],dump:[0,3,48],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,48],dump_one_value_per_step:[3,48],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,9,10,11,19,27,37,38,48,49],dynam:[37,43,47],e_greedi:27,each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,26,27,28,29,30,31,34,35,37,38,39,40,41,43,47,48],eas:37,easi:[36,37,46],easier:40,easili:[27,49],echo:41,effect:[0,3,6,17,29,38,48],effici:[38,47],either:[0,3,5,17,23,27,34,37,40,49],element:[3,11,23,29,34],elf:41,embbed:23,embed:[3,19,23,48],embedd:[23,40],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,emul:[3,48],emulate_act_on_train:[3,48],emulate_observe_on_train:[3,48],enabl:[23,40,49],encod:[29,34],encourag:[18,20,38],end:[2,3,9,22,24,26,29,48,49],enforc:30,engin:[26,44],enough:[4,19],ensembl:[27,47],ensur:23,enter:[3,48,49],entir:[10,16,19,22,27,30,38],entri:[19,38],entropi:[1,5,6,9,10,12,22,27],enumer:34,env:[24,41],env_param:36,env_respons:[3,48],enviorn:26,environ:[0,3,4,15,23,24,27,28,29,30,34,35,38,41,43,45,46,48],environmentparamet:[26,36],envrespons:[0,3,26,48],episod:[0,3,4,5,9,10,11,16,17,22,26,27,35,36,37,38,39,48,49],episode_max_tim:26,episodic_hindsight_experience_replai:31,epoch:6,epsilon:[6,27,31],epsilon_schedul:27,equal:2,equat:[7,13,14,17,21],error:[23,47],escap:49,especi:15,essenti:[17,23,30,36,38,41],estim:[5,6,10,11,16,20,27],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,26,28,34,35,44,48],evalu:[0,3,23,24,27,38,48],evaluate_onli:0,evaluation_epsilon:27,evaluation_noise_percentag:27,even:[15,23,26,36,37,38,47],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,49],exact:[19,27,43],exactli:23,exampl:[2,3,4,23,24,26,27,28,29,30,34,35,36,38,40,48,49],except:[17,24],execut:[24,37,38],exhibit:[3,35,48],exist:[19,23],exit:[3,48],expand_dim:24,expect:[0,3,27,43,48],experi:[0,7,10,22,26,31,32,37,38,39,41,46,47,49],experiment_path:[0,26],experiment_suit:26,experimentsuit:26,expert:[1,2,24,47],exploit:[27,38],explor:[3,4,5,6,7,8,10,11,16,18,19,35,38,46,47],exploration_polici:27,explorationparamet:[3,27,35],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[37,40,46],extend:[26,27,44],extens:[26,44],extent:49,extern:0,extra:[23,24,40],extract:[3,18,19,24,29,34,37,38,48],factor:[7,9,10,20,22,23,24,27,29],faithfulli:37,fake:34,fals:[0,3,7,23,24,26,27,30,31,34,36,48],far:[10,29,38,43],faster:[15,47],featur:[7,26,40,46,47],feature_minimap_maps_to_us:26,feature_screen_maps_to_us:26,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,27,36],field:[43,46],file:[0,3,35,38,48,49],fill:[24,36],filter:[0,3,46,48],find:[13,37,44,46],finish:[19,49],finit:30,first:[0,7,10,11,19,21,22,23,24,29,38,40],fit:34,flag:[0,3,23,24,26,48],flexibl:39,flicker:26,flow:[28,46],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,26,27,31,35,36,41,43,47,48],footprint:29,forc:[23,26,30,36],force_cpu:23,force_environment_reset:[26,36],force_int_bin:30,forced_attention_s:34,form:[4,17,34,47],format:35,formul:5,forward:[23,27],found:[3,42,49],frac:[6,12,22],fraction:[6,10],frame:[0,26],frame_skip:26,framework:[0,3,23,35,46,48],framework_typ:0,free:[26,44],freeglut3:41,from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,43,44,46,48,49],full:[3,9,16,30,48],fulldiscreteactionspacemap:30,fulli:23,func:[3,48],futur:[0,3,9,24,47],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,26,44,46,48,49],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,47],gather:39,gaussian:[10,27],gener:[0,5,6,10,11,23,26,27,31,34,35,41,49],general_network:35,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,26,27,34,38,40,41,43,48],get_act:27,get_action_from_us:26,get_available_kei:26,get_first_transit:24,get_goal:26,get_last_env_respons:26,get_last_transit:24,get_output_head:35,get_predict:[3,48],get_random_act:26,get_rendered_imag:[26,36],get_reward_for_goal_and_st:34,get_state_embed:[3,48],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gfortran:41,gif:0,git:41,github:[36,41,43,46],given:[0,1,2,3,4,5,7,9,10,23,24,26,27,29,30,31,34,35,38,48],given_weight:23,global:[3,23,40,48],global_network:23,glx:41,goal:[1,2,3,4,23,24,26,31,38,40,47,48],goal_from_st:34,goal_nam:34,goal_spac:26,goal_vector:4,goals_spac:31,goalsspac:[31,34],goaltorewardconvers:34,going:28,good:[36,37],gpu:[0,23],gracefulli:49,gradient:[3,5,6,10,17,19,23,35,47,48],gradientclippingmethod:23,gradients_clipping_method:23,granular:31,graph:0,graphmanag:38,grayscal:[29,34],greedili:38,group:37,grow:22,guidelin:47,gym:[41,44],gym_environ:[26,49],gymenviron:26,gymenvironmentparamet:36,hac:47,had:43,hand:[15,29,38,47],handl:4,handle_episode_end:[3,26,48],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[37,47],harder:37,has:[0,3,15,19,20,24,27,29,38,40,43,47,48],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,26,27,29,30,31,38,40,43,48],head:[1,2,3,5,9,11,15,18,19,23,27,35,40,48],headparamet:23,heads_paramet:23,health_gath:26,heatup:[27,38],help:[20,24,37,38,47],here:[36,38],heurist:[10,27],hide:40,hierarch:[34,38],hierarchi:[3,38,47,48],high:[7,10,29,30,34,37],high_i:34,high_kl_penalty_coeffici:10,high_x:34,higher:10,highest:[5,9,20,27,29,30,34],highli:[0,36,47],hindsight:[8,31,47],hindsight_goal_selection_method:31,hindsight_transitions_per_regular_transit:31,hindsightgoalselectionmethod:31,hold:[11,23,24,31,37,38,40],horizont:[41,46,49],host:41,hostnam:0,hot:34,how:[4,6,10,27,39,41,47,49],hrl:31,html:41,http:[17,31,41],hub:41,huber:21,huber_loss_interv:21,human:[0,26],human_control:26,hyper:[35,43],hyperparamet:35,ident:23,identifi:[23,34],ignor:26,imag:[0,23,26,29,30,34,36,40,49],image1:41,imit:[3,24,42,47],impact:23,implement:[3,6,10,23,25,26,27,31,35,36,39,43,47,49],impli:49,implment:33,importance_weight:23,importantli:38,improv:[5,15,22,26,38,47],includ:[0,3,4,26,28,29,33,40,44,48,49],increas:[10,20,29,47],increment:[3,48],index:[0,2,24,26,29,30,31],indic:34,inf:[29,34],infer:[3,23,26,48],infinit:47,info:[3,11,24,34,36,48],info_as_list:24,inform:[3,4,17,24,26,28,37,38,41,44,48],inherit:[3,35,36],init_environment_dependent_modul:[3,48],initi:[3,4,10,20,23,24,35,38,46,48],initial_feed_dict:23,initial_kl_coeffici:10,innov:47,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,28,34,38,40,48],input_embedders_paramet:23,input_high:29,input_low:29,input_space_high:30,input_space_low:30,inputembedderparamet:23,inputfilt:38,insert:[19,24],inspect:0,instal:[41,49],instanc:[3,32,34,40],instanti:[3,26,38],instead:[0,3,6,17,20,23,29,30,38,47,48],instruct:49,intact:[11,43],integ:[0,29,30],integr:[36,38,39,46],intel:46,intend:[9,23,27,38],interact:[24,38,39,46,49],interest:[23,37],interfac:[26,37,39,44],intermedi:19,intern:[3,9,17,23,24,28,38,48,49],interpol:29,intersect:47,interv:21,intrins:24,intro:46,introduc:47,invers:[26,44],invok:38,involv:35,is_empti:24,is_point_in_space_shap:34,item:24,iter:[3,5,7,10,15,23,48],its:[0,3,12,22,23,24,27,34,38,41,47,48,49],itself:[23,34,49],job:0,job_typ:0,joint:26,json:0,jump:[4,30],jupyt:35,just:[3,10,20,22,36,38,40,48,49],kapa:21,keep:[14,24,29,49],kei:[2,19,23,24,26,31,35,37,41,49],key_error_threshold:31,key_width:31,keyboard:[26,49],keyword:23,kl_coeffici:23,kl_coefficient_ph:23,know:[3,47,48,49],knowledg:[3,38,48],known:[24,37,43,47],kubeconfig:33,kubernet:41,kubernetes_orchestr:33,kubernetesparamet:33,kwarg:[23,26],l2_norm_added_delta:19,l2_regular:23,lack:37,lamb:27,lambda:[5,6,10,27],lane:2,larg:[27,29,44],larger:23,last:[4,10,19,24,26,29],last_env_respons:26,lastli:38,later:[0,3,23,48,49],latest:[17,19,38,41],layer:[23,27,31,38,40],lazi:[24,29],lazystack:29,lbfg:23,ld_library_path:41,lead:27,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,26,27,29,37,38,40,42,43,44,47,48],learn_from_batch:[3,35,38,48],learner:23,learning_r:[23,31],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[40,47],leav:[10,11],left:[2,47],length:[4,5,6,10,17,19,23,24],less:[15,47],level:[0,3,23,26,36,48,49],levelmanag:[3,38,48],levelselect:26,libatla:41,libav:41,libavformat:41,libbla:41,libboost:41,libbz2:41,libfluidsynth:41,libgl1:41,libglew:41,libgm:41,libgstream:41,libgtk2:41,libgtk:41,libjpeg:41,liblapack:41,libnotifi:41,libopen:41,libosmesa6:41,libportmidi:41,librari:[26,41,44],libsdl1:41,libsdl2:41,libsdl:41,libsm:41,libsmpeg:41,libswscal:41,libtiff:41,libwebkitgtk:41,libwildmidi:41,like:[26,34,38,40,41,47],likelihood:[6,10],line:[3,38,48,49],linear:30,linearboxtoboxmap:30,linearli:30,list:[0,3,4,23,24,26,27,29,30,34,35,48,49],load:[0,37,39,49],load_memory_from_file_path:49,local:[3,40,41,48],locat:[21,24,29,47],log:[0,3,5,9,48],log_to_screen:[3,48],logger:[0,3,48],look:[36,41],loop:38,loss:[1,2,3,6,9,10,12,13,14,21,22,23,27,35,40,48],lot:[27,37,43,47],low:[7,10,29,30,34],low_i:34,low_x:34,lower:[0,31,38],lowest:[29,30,34],lstm:40,lumin:29,lvert:[12,22],lvl:49,mai:[0,23,42,49],main:[3,35,38,40,42,48,49],mainli:39,major:27,make:[0,3,23,26,35,37,41,43,47,48],manag:[3,23,39,41,48],mandatori:[34,36,40],mani:[3,15,42,43],manner:[10,16,17,20,29,38],manual:41,map:[3,23,26,28,29,30,34,35,48],mark:24,markdown:48,mask:[11,30],masked_target_space_high:30,masked_target_space_low:30,master:[3,38,41,48],match:[2,19,23,34],mathbb:5,mathop:5,max:[5,12,17,22,29],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:26,max_simultaneous_selected_act:34,max_siz:31,max_spe:26,maxim:[4,13],maximum:[0,12,14,19,20,24,26,27,29,31],mean:[0,2,6,7,8,9,10,18,23,27,29,30,34,37,47],meant:40,measur:[3,4,23,26,29,34,36,47,48],measurements_nam:34,mechan:[28,39,43,49],memor:47,memori:[3,22,24,29,35,38,39,41,46,47,48],memory_backend:41,memorygranular:31,memoryparamet:[3,35],merg:[23,26],mesa:41,method:[0,5,6,10,17,23,29,31],metric:[0,34,37],middlewar:[19,23,40],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,26,35,40,48],min:[6,12,20,22],min_reward_threshold:0,mind:49,minim:[2,4,12],minimap_s:26,minimum:[0,6,29],mix:[3,6,10,19,20,47],mixedmontecarloalgorithmparamet:16,mixer1:41,mixtur:[16,23],mjkei:41,mjpro150:41,mjpro150_linux:41,mkdir:41,mmc:[16,47],mmc_agent:16,mode:[20,23,25,32,33,38,39,41,49],model:[0,16,18,23,46,49],modif:47,modul:[3,35,38,39,48],modular:[35,38,40,46],monitor:39,mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,29,35,37,38,40,41,46,48,49],moreov:37,most:[3,9,19,23,24,27,40,43,47,48,49],mostli:[29,38],motiv:38,move:[6,10,29,37,43],mp4:0,mse:[2,13,14,21],much:[6,10,38,47],mujoco:[26,30,36,41,44],mujoco_kei:41,mujoco_pi:41,multi:[10,23,34,40],multiarrai:[3,48],multidimension:34,multipl:[4,6,10,17,23,26,27,29,30,31,34,37,38,43,46,49],multipli:[4,9,23,29],multiselect:30,multitask:[26,44],must:[23,29,43],mxnet:49,n_step:[19,22,24,31],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:47,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,26,29,34,35,41,48,49],namespac:33,nasm:41,nativ:[0,26,36,44],native_rend:0,navig:3,ndarrai:[3,23,24,26,27,29,30,34,36,48],nearest:19,neat:37,nec:[0,47],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,48],necessarili:29,need:[0,3,22,23,26,27,34,35,38,43,47,48,49],neg:[4,29],neighbor:19,neon_compon:35,nervanasystem:41,network:[0,3,23,27,35,38,43,46,47,48,49],network_input_tupl:23,network_nam:[3,48],network_param:27,network_paramet:23,network_wrapp:[3,23,48],networkparamet:[3,23,27,35],networkwrapp:[3,48],neural:[3,16,23,40,43],never:23,new_value_shift_coeffici:[19,31],new_weight:23,newli:[20,36,47],next:[3,7,13,14,18,20,21,24,26,38,48,49],next_stat:24,nfs_data_stor:25,nfsdatastoreparamet:25,nice:49,no_accumul:23,node:[23,40],nois:[7,8,18,27,38],noise_percentage_schedul:27,noisi:[9,22,27],non_episod:31,none:[0,3,6,7,10,23,24,26,27,29,30,34,36,48],norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,27,28,29,34],note:[19,23,27,48],notebook:35,notic:[23,47],notori:[37,43,47],now:[6,36],nstepqalgorithmparamet:17,nth:22,num_act:[19,31,34],num_bins_per_dimens:30,num_class:31,num_consecutive_playing_step:[3,7,48],num_consecutive_training_step:[3,48],num_gpu:0,num_neighbor:31,num_predicted_steps_ahead:4,num_speedup_step:26,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,26,27,29,30,31,37,44,49],number_of_knn:19,numpi:[3,23,24,26,27,29,30,34,36,48],nvidia:41,object:[0,3,22,23,26,27,29,31,38,48],observ:[0,3,4,10,23,24,26,28,36,38,48],observation_reduction_by_sub_parts_name_filt:29,observation_rescale_size_by_factor_filt:29,observation_rescale_to_size_filt:29,observation_space_s:23,observation_space_typ:26,observation_stat:29,observation_typ:26,observationspac:34,observationspacetyp:26,observationtyp:26,obtain:[3,48],off:[39,47],offer:[26,44],often:[37,38,40],old:[6,10,23,47],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,34,49],one:[0,3,15,19,20,23,24,26,27,28,31,34,36,37,40,47,48],ones:[36,47],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,26,27,29,30,36,38,47,48,49],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,38,40],online_network:23,onnx:[0,23],onto:28,open:[0,26,44],openai:[41,44],opencv:41,oper:[20,23,29],optim:[3,4,23,42],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,26,30,34,35,37,39,40,49],orchestr:[39,41,46],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,28,29,30,37,38,40,43,47,48],org:[17,31],origin:[17,29,30,43],ornstein:[7,8,27],other:[0,2,9,15,20,23,26,28,29,31,37,38,47],otherwis:[10,11,23,26,27,34],ou_process:27,our:6,out:[2,13,14,27,28,30,37,41,46,47,49],outcom:[27,38],output:[0,4,7,11,12,18,19,23,27,28,29,34,35,40],output_0_0:23,output_observation_spac:29,outputfilt:38,outsid:[4,27],over:[3,6,9,10,17,19,22,23,24,27,29,30,37,38,47,48],overestim:7,overfit:10,overhead:0,overlai:37,override_existing_kei:31,overriden:35,overview:38,overwhelm:38,overwritten:23,own:[23,35],p_j:[12,22],page:[3,43],pair:[0,34],pal:[20,47],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,26,31,43],parallel:[23,37,40],parallel_predict:23,param:[3,23,24,25,26,27,32,33,35,36,48],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,35,36,43,46,48,49],parameter_nois:27,parameters_server_host:0,parent:[3,23,48],parent_path_suffix:[3,23,48],parmet:3,pars:38,part:[0,11,23,24,27,29,30,39,40,43,47],part_nam:29,partial:30,partialdiscreteactionspacemap:30,particular:4,particularli:[26,27,34,43,47],pass:[0,4,7,8,18,19,23,26,27,28,36,37,38,40,49],patamet:19,patchelf:41,patchelf_0:41,path:[0,3,23,35,36,41,48,49],pattern:38,pdf:31,penal:[6,7,10],penalti:10,pendulum_hac:36,pendulum_with_go:36,pendulumwithgo:36,per:[0,3,4,34,35,38,48],percentag:27,percentil:27,perceptron:40,perform:[0,3,23,24,29,31,36,37,38,47,48],period:[40,49],persist:3,persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,26,27,38,48],phi:[12,22],physic:[26,44],pi_:6,pick:26,pickl:49,pip3:41,pip:41,pixel:26,place:[30,37,38],placehold:[23,27],plai:[0,3,9,11,13,14,17,27,35,37,48],plain:40,planarmap:26,planarmapsobservationspac:29,platform:[26,44],pleas:[17,43],plu:23,plugin:41,point:[29,34,38,39],polici:[1,3,4,5,8,11,17,18,19,25,35,38,39,40,41,42,46,47],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:35,popul:38,popular:[26,44],port:0,posit:[4,29],possibl:[2,3,4,19,27,30,34,37,40,46,47,48,49],post:[28,46],post_training_command:[3,48],power:[26,44],ppo:[6,10,47],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,27,28],predefin:[11,20,27,49],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,27,40,47,48],prediction_typ:[3,48],predictiontyp:[3,48],prefect:47,prefer:23,prefix:[3,48],prep:41,prepar:[3,48],prepare_batch_for_infer:[3,48],present:[15,19,26,29,47],preset:[0,5,35,36,38,39,41,49],press:[37,49],prevent:[7,10,38],previou:29,previous:[10,23],print:[0,3,49],print_networks_summari:0,priorit:[22,31],prioriti:[22,31],privat:34,probabilit:5,probabl:[3,5,9,11,12,22,24,27,35,47,48],process:[0,3,7,8,23,27,28,29,30,35,37,38,40,43,46,48],produc:23,progress:23,project:[12,22],propag:6,propagate_updates_to_dnd:19,properti:[23,31,35,36,41],proport:31,provid:[23,39],proxi:38,proxim:3,pub:[32,33,41],publish:43,purpos:[0,3,9],pursuit:2,pybullet:[26,44],pygam:[0,41],pytest:41,python3:41,python:[26,31,35,41,44,46],qr_dqn_agent:21,qualiti:26,quantil:[3,47],quantileregressiondqnalgorithmparamet:21,queri:[19,23,38,47],question:47,quit:37,r_i:[5,17],r_t:[4,6,22],rainbow:[3,35,47],rainbow_ag:35,rainbow_dqn_ag:22,rainbowag:35,rainbowagentparamet:35,rainbowalgorithmparamet:35,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:35,rainbowmemoryparamet:35,rainbownetworkparamet:35,rais:[3,24,48],ramp:[35,38],random:[0,17,26,27,34,38,43],random_initialization_step:26,randomli:[24,38],rang:[6,7,10,12,22,26,27,29,30,34,47],rare:19,rate:[0,16,19,23,26,40],rate_for_copying_weights_to_target:7,rather:[4,37],ratio:[6,10,16,29],raw:[26,44],reach:[0,10,34],read:25,readabl:38,readm:41,real:3,reason:[29,43],rebuild_on_every_upd:31,receiv:[23,24],recent:[3,22,23,47,48],recommend:36,redi:[32,33,41],redispubsub:41,redispubsubmemorybackendparamet:32,reduc:[1,2,9,10,20,23,29,38,47],reduct:29,reduction_method:29,reductionmethod:29,redund:29,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,39,41],referenc:3,regard:[3,48],regist:[3,48],register_sign:[3,48],registri:41,regress:[2,3,47],regula:[6,10],regular:[5,6,9,10,17,19,23,27,30,31,47],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,26,27,37,38,40,42,43,44,47],rel:27,relat:[23,41],relationship:47,releas:[46,47],relev:[3,11,27,29,48],remov:29,render:[0,3,26,36],reorder:29,repeat:[26,38],replac:[27,29,31,41],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,31,38,47,48,49],replay_buff:49,replicated_devic:23,repo:36,repositori:46,repres:[0,6,10,12,22,23,24,26,27,30,34,49],represent:40,reproduc:[38,43],request:[3,23,48],requir:[3,23,25,27,29,37,40,41,47,48],requires_action_valu:27,rescal:[4,5,6,9,10,23,28,29],rescale_factor:29,rescaleinterpolationtyp:29,rescaling_interpolation_typ:29,research:[26,43,44],reset:[3,19,23,26,27,36,48],reset_accumulated_gradi:23,reset_evaluation_st:[3,48],reset_gradi:23,reset_internal_st:[3,26,48],resourc:[39,41],respect:[7,24,26],respons:[3,24,26,38,48],rest:[23,24,30,41],restart:36,restor:[0,3,48],restore_checkpoint:[3,48],result:[3,4,12,13,14,15,21,22,23,29,30,43,47,48,49],retriev:[19,31],return_additional_data:31,reus:38,reusabl:40,reward:[0,1,2,3,4,7,9,16,17,22,23,24,26,28,34,36,37,38,47,48],reward_test_level:0,reward_typ:34,rgb:[26,29,34],rho:7,right:[2,3,27,30,37,47,48],rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,48,49],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[26,44],robot:[26,34,44,46],roboti:41,rollout:[3,25,32,33,39,41,48,49],root:[37,41],rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,26,27,29,48,49],run_pre_network_filter_for_infer:[3,48],runphas:[3,48],runtim:41,rvert:[12,22],s3_bucket_nam:41,s3_creds_fil:41,s3_data_stor:25,s3_end_point:41,s3datastoreparamet:25,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:47,same:[3,4,6,9,16,17,20,23,26,30,31,37,40,43,47,48],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,27,31,34,38,41,48],sample_with_info:34,satur:7,save:[0,3,22,23,27,41,48,49],save_checkpoint:[3,48],saver:[3,23,48],savercollect:[3,23,48],scale:[4,9,23,29,37,41,46,49],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,27,31,38,39,41,49],scheme:[5,27,38,47],schulman:10,sci:41,scienc:43,scipi:[29,41],scope:23,scratch:47,scratchpad:0,screen:[3,26,36,49],screen_siz:26,script:38,second:[0,23,37,47,49],section:[41,42,44],see:[3,26,29,41,43,44,47,48,49],seed:[0,26,43],seen:[4,19,20,26,29,38,43,47],segment:[26,34],select:[5,11,19,23,24,27,29,30,34,36,37,38,46,49],self:[3,23,35,36,48],send:[36,40],separ:[0,3,15,29,30,40,42,47],separate_actions_for_throttle_and_brak:26,seper:9,sequenti:[4,24,31],serv:[6,9,40],server:0,server_height:26,server_width:26,sess:[3,23,48],session:[3,23,48],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,26,27,29,30,34,35,39,43,44,46,47,48,49],set_environment_paramet:[3,48],set_goal:26,set_incoming_direct:[3,48],set_is_train:23,set_sess:[3,48],set_variable_valu:23,set_weight:23,setup:[3,41,48],setup_logg:[3,48],setuptool:41,sever:[0,3,6,9,10,11,23,26,27,29,35,36,37,38,40,44,47,48,49],shape:[23,29,34],share:[0,3,23,31,40,48],shared_memory_scratchpad:0,shared_optim:23,shift:[30,38],shine:37,should:[0,3,4,6,10,11,17,20,23,24,26,29,31,34,35,36,39,48,49],should_dump:0,shouldn:11,show:43,shown:43,shuffl:24,side:[3,48],sigma:27,signal:[3,38,48],signal_nam:[3,48],significantli:15,similar:[6,15,17,24,26,30,47],simpl:[9,31,35,36,40,46,47,49],simplest:47,simplif:47,simplifi:[6,37,40],simul:[26,36,44,49],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,27,29,48],singl:[3,4,5,6,10,11,15,16,17,23,24,26,27,30,34,37,38,40,48],size:[23,24,27,29,30,31,34],skill:47,skip:[26,38],slave:[3,48],slice:24,slow:[23,49],slower:[0,15,23],slowli:7,small:[6,19,31],smaller:27,smooth:37,soft:[7,10,18],softmax:27,softwar:41,solut:47,solv:[29,36,44,46],some:[0,3,10,23,24,27,29,35,36,37,40,43,47,48,49],sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,34,36,41,44,48],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29,30,31,38,46,48],spacesdefinit:[3,23,48],spatial:47,spawn:[39,41],special:15,specif:[0,3,11,15,19,23,24,35,38,49],specifi:[0,23,26,27,29,36,39,49],speed:[23,29,47],speedup:49,spread:[29,30],squar:29,squeeze_list:23,squeeze_output:23,src:41,stabil:[17,23,47],stabl:[40,47],stack:[3,28,29,34,48],stack_siz:[23,29],stacking_axi:29,stage:40,stai:43,standard:[6,9,10,11,27,29,37],starcraft2_environ:26,starcraft2environ:26,starcraft:[34,44],starcraftobservationtyp:26,start:[3,7,10,15,20,24,29,30,36,41,48],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,29,31,34,35,36,38,40,42,47,48],state_key_with_the_class_index:[2,31],state_spac:26,state_valu:24,statist:[3,9,29,46,48],stdev:27,steep:27,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,26,27,29,35,36,37,38,47,48,49],stepmethod:[7,17],stochast:38,stop:[0,26],store:[0,3,19,22,24,26,29,31,37,38,39,41,46,48,49],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,26,27,29,30,34,48],strategi:[26,44],stream:[15,39],strict:43,string:[0,23,26],structur:[0,3,24,31,35,38,48],stuff:23,style:27,sub:[30,31,32,33,34,35,38,41,49],sub_spac:34,subset:[37,43,47],subtract:20,succeed:26,success:[0,26,47],suffer:37,suffici:24,suffix:[3,23,48],suggest:35,suit:[0,44],suitabl:[39,49],sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,48],supervis:47,suppli:[3,48],support:[0,3,23,26,27,37,40,41,42,44,46,49],sure:[0,41,43],surrog:6,swig:41,swingup:26,symbol:23,sync:[3,23,38,39,48],synchron:[0,23,38,40],t_max:[9,17],tag:41,take:[0,9,10,15,19,20,23,26,27,28,36,37,38],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,26,27],tanh:7,tar:41,target:[0,1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,29,30,34,35,38,40,48],target_act:30,target_kl_diverg:10,target_network:23,target_success_r:26,targets_horizon:17,task:[0,1,2,26,29,35,37,44],task_index:0,techniqu:[6,10,46,47],technolog:39,teh:23,temperatur:27,temperature_schedul:27,tensor:[3,23,48],tensorboard:0,tensorflow:[0,3,23,48,49],tensorflow_support:23,term:[6,10,34],termin:[3,7,24,38,48],test:[0,3,5,7,8,9,10,23,35,43,46,49],test_using_a_trace_test:0,textrm:38,than:[0,3,10,23,27,37,40,48],thei:[3,19,20,23,27,37,38,39,47,48,49],them:[4,5,9,17,23,24,26,29,34,36,37,40],therefor:[0,7,23,28,47],theta:[6,7,12,22,27],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,26,27,28,29,30,31,32,34,35,36,37,38,39,40,41,43,47,48,49],thing:37,those:[0,3,7,11,13,14,15,19,24,27,30,38,40,42,47,48],thousand:[10,11,12,13,14,16,20,21,22],thread:23,three:[3,39,40,41,42],threshold:[10,19,29],through:[0,3,4,7,8,9,10,11,19,20,23,35,36,38,40,48],tild:7,time:[0,4,20,23,27,30,31,37,40,47],time_limit:36,timestep:[4,9],timid:41,tmp:0,togeth:[3,17,24,38,48],toggl:37,too:10,tool:[37,41,47],top:[23,26,28,29,31,36,37,47],torqu:26,total:[0,3,9,10,16,19,20,24,31,35,37,47,48],total_loss:23,total_return:24,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:27,train:[0,3,15,23,27,32,33,35,36,37,38,39,40,43,46,47,48],train_and_sync_network:23,train_on_batch:23,trainer:[25,39],transfer:[26,32,44],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,31,35,38,39,48],transition_idx:24,tri:47,trick:43,tricki:37,trigger:[26,41],truncated_norm:27,ttf2:41,tune:27,tupl:[1,2,3,7,23,24,26,31,34,35],turn:[2,47],tutori:[35,36],tweak:[3,48],two:[7,9,17,23,26,27,28,29,30,34,36,39,40,49],txt:41,type:[0,3,9,15,23,26,29,34,35,38,40,46,47,48,49],typic:[6,10,23,47,49],ubuntu16:41,uhlenbeck:[7,8,27],uint8:29,unbound:34,uncertain:27,uncertainti:27,unchang:10,unclip:[3,35,48],uncorrel:17,undeploi:39,under:[3,23,35,49],underbrac:5,understand:49,unifi:6,uniformli:[26,27,30,34],union:[3,24,26,27,30,34,48],uniqu:23,unit:37,unlik:10,unmask:30,unnecessari:0,unshar:[3,48],unsign:29,unspecifi:23,unstabl:[37,43],until:[0,9,10,19,22,27],unus:23,unzip:41,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,27,35,36,37,38,40,41,47,48],update_discounted_reward:24,update_log:[3,48],update_online_network:23,update_step_in_episode_log:[3,48],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,48],upgrad:41,upon:[3,5,35,48],upper:27,usag:[30,46],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,27,29,30,31,34,35,36,38,40,41,46,47,48,49],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:26,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,26,27,29,30,31,32,33,35,36,38,39,40,43,48,49],useful:[0,3,4,22,23,27,29,34,43,47,48,49],user:[23,26,27,37,38,41],userguid:41,uses:[0,1,6,10,15,24,25,27,33,38,39,41,43,47,49],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,27,29,32,35,36,37,39,44,47,48,49],usr:41,usual:[29,38],util:[3,37,48],v_max:12,v_min:12,val:[3,34,48],val_matches_space_definit:34,valid:[0,34],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,26,27,29,30,31,34,35,38,40,41,42,47,48],valuabl:37,value_targets_mix_fract:[6,10],valueexcept:[3,48],valueoptimizationag:35,van:4,vari:40,variabl:[23,26,41],variable_scop:23,varianc:[9,27,37],variant:[27,31,47],variou:[3,24,31,46],vector:[3,4,7,8,10,11,23,26,29,34,36,40,47,48],vectorobservationspac:29,verbos:26,veri:[0,6,7,9,15,19,37,47,49],version:[6,10,24],versu:23,vertic:23,via:[2,11],video:[0,3,26],video_dump_method:0,view:37,viewabl:[3,48],visit:43,visual:[0,3,26,44,46],visualization_paramet:26,visualizationparamet:[3,26],vizdoom:[41,44],vote:27,wai:[3,6,10,27,30,36,38,40,46,48,49],wait:[5,23,39],walk:36,want:[3,4,22,23,24,29,30,31,48],warn:[27,29,30],wasn:24,weather_id:26,websit:[26,46],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,27,38,40,47],well:[19,23,27,34,47],went:10,were:[4,12,13,14,15,19,21,22,23,24,30,43],west:41,wget:41,what:[10,47],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,27,29,32,33,35,36,37,48,49],whenev:39,where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,26,27,29,30,34,37,47,48],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,27,29,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49],who:38,why:[37,38],window:[29,30],wise:29,within:[0,6,10,18,27,34,37],without:[5,10,30,31,37,47,49],won:[4,23],wont:23,work:[3,17,23,27,29,30,37,38,47,48,49],workaround:0,workdir:41,worker:[0,3,17,23,25,29,31,32,33,37,39,40,41,47,48,49],worker_devic:23,worker_host:0,wors:47,would:[23,41,47],wrap:[26,29,38,44],wrapper:[3,23,24,26,34,40,48],write:[0,3,48],written:[3,22,25,48],www:41,xdist:41,y_t:[7,11,13,14,16,18,19,20],year:47,yet:[15,36],you:[4,29,31,35,36,41,46,49],your:[35,36,41,49],yuv:29,z_i:[12,22],z_j:[12,22],zero:[2,13,14],zip:41,zlib1g:41},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[35,36],"switch":49,Adding:[35,36],Using:36,across:47,action:[4,5,6,7,8,9,10,11,18,19,30,34,47],actioninfo:24,actor:[5,8],addit:[0,49],additivenois:27,advantag:[18,20],agent:[3,35,38,49],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,42,47,49],api:36,architectur:23,attentionactionspac:34,backend:32,balancedexperiencereplai:31,batch:24,behavior:1,benchmark:43,between:49,blizzard:26,boltzmann:27,bootstrap:[11,27],boxactionspac:34,build:41,can:47,carla:26,carlo:16,categor:[12,27],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:[1,41],coach:[36,37,39,41,46],collect:47,compar:37,compoundactionspac:34,condit:2,config:41,contain:41,continu:[6,10,47],continuousentropi:27,control:[19,26,38],copi:40,core:24,creat:41,critic:[5,8],dashboard:37,data:25,deep:[7,14,49],deepmind:26,demonstr:47,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:40,determinist:7,direct:4,discret:[5,9,47],discreteactionspac:34,distribut:[39,41],distributedtaskparamet:0,doe:47,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:49,egreedi:27,environ:[26,36,44,47,49],envrespons:24,episod:[19,24,31],episodicexperiencereplai:31,episodichindsightexperiencereplai:31,episodichrlhindsightexperiencereplai:31,evalu:49,experiencereplai:31,explor:27,explorationpolici:27,featur:45,file:41,filter:[28,29,30],flag:49,flow:38,framework:49,from:47,futur:4,gener:15,gif:49,goal:34,gradient:[7,9],graph:38,greedi:27,gym:[26,36],have:47,hierarch:8,horizont:39,human:[47,49],imag:41,imageobservationspac:34,imit:[2,49],implement:41,input:29,interfac:41,keep:40,kubernet:33,learn:[2,17,20,46,49],level:38,manag:38,memori:[31,32],mix:16,mont:16,more:47,multi:49,multipl:47,multiselectactionspac:34,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,40],networkwrapp:23,neural:19,nfsdatastor:25,node:[47,49],non:31,normal:18,observ:[29,34],observationclippingfilt:29,observationcropfilt:29,observationmoveaxisfilt:29,observationnormalizationfilt:29,observationreductionbysubpartsnamefilt:29,observationrescalesizebyfactorfilt:29,observationrescaletosizefilt:29,observationrgbtoyfilt:29,observationsqueezefilt:29,observationstackingfilt:29,observationtouint8filt:29,openai:[26,36],optim:[6,10],orchestr:33,ouprocess:27,out:39,output:30,pain:47,parallel:47,paramet:0,parameternois:27,persist:20,plai:49,planarmapsobservationspac:34,polici:[6,7,9,10,27],predict:4,prerequisit:41,presetvalidationparamet:0,prioritizedexperiencereplai:31,process:47,proxim:[6,10],push:41,qdnd:31,quantil:21,rainbow:22,redispubsubbackend:32,regress:21,reinforc:46,render:49,repositori:41,reward:29,rewardclippingfilt:29,rewardnormalizationfilt:29,rewardrescalefilt:29,run:[37,41],s3datastor:25,sampl:47,scale:39,select:47,signal:37,simul:47,singl:49,singleepisodebuff:31,solv:47,space:[34,47],starcraft:26,statist:37,step:17,store:[11,25],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:26,support:39,sync:40,synchron:39,task:47,taskparamet:0,test:48,thread:49,through:49,track:37,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,49],transit:[11,24],transitioncollect:31,truncatednorm:27,type:[24,39],ucb:27,usag:[41,49],vectorobservationspac:34,visual:[37,49],visualizationparamet:0,vizdoom:26,you:47,your:47}})
\ No newline at end of file
+Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/acer","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/acer.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.acer_agent":{ACERAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[13,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[8,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[49,0,1,""],DQNAlgorithmParameters:[15,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[49,1,1,""],call_memory:[49,1,1,""],choose_action:[49,1,1,""],collect_savers:[49,1,1,""],create_networks:[49,1,1,""],emulate_act_on_trainer:[49,1,1,""],emulate_observe_on_trainer:[49,1,1,""],get_predictions:[49,1,1,""],get_state_embedding:[49,1,1,""],handle_episode_ended:[49,1,1,""],init_environment_dependent_modules:[49,1,1,""],learn_from_batch:[49,1,1,""],log_to_screen:[49,1,1,""],observe:[49,1,1,""],parent:[49,2,1,""],phase:[49,2,1,""],post_training_commands:[49,1,1,""],prepare_batch_for_inference:[49,1,1,""],register_signal:[49,1,1,""],reset_evaluation_state:[49,1,1,""],reset_internal_state:[49,1,1,""],restore_checkpoint:[49,1,1,""],run_pre_network_filter_for_inference:[49,1,1,""],save_checkpoint:[49,1,1,""],set_environment_parameters:[49,1,1,""],set_incoming_directive:[49,1,1,""],set_session:[49,1,1,""],setup_logger:[49,1,1,""],sync:[49,1,1,""],train:[49,1,1,""],update_log:[49,1,1,""],update_step_in_episode_log:[49,1,1,""],update_transition_before_adding_to_replay_buffer:[49,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[11,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[23,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[24,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[24,1,1,""],apply_and_reset_gradients:[24,1,1,""],apply_gradients:[24,1,1,""],collect_savers:[24,1,1,""],construct:[24,3,1,""],get_variable_value:[24,1,1,""],get_weights:[24,1,1,""],parallel_predict:[24,3,1,""],predict:[24,1,1,""],reset_accumulated_gradients:[24,1,1,""],set_variable_value:[24,1,1,""],set_weights:[24,1,1,""],train_on_batch:[24,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[24,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[24,1,1,""],apply_gradients_to_global_network:[24,1,1,""],apply_gradients_to_online_network:[24,1,1,""],collect_savers:[24,1,1,""],parallel_prediction:[24,1,1,""],set_is_training:[24,1,1,""],sync:[24,1,1,""],train_and_sync_networks:[24,1,1,""],update_online_network:[24,1,1,""],update_target_network:[24,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[24,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[25,0,1,""],Batch:[25,0,1,""],EnvResponse:[25,0,1,""],Episode:[25,0,1,""],Transition:[25,0,1,""]},"rl_coach.core_types.Batch":{actions:[25,1,1,""],game_overs:[25,1,1,""],goals:[25,1,1,""],info:[25,1,1,""],info_as_list:[25,1,1,""],n_step_discounted_rewards:[25,1,1,""],next_states:[25,1,1,""],rewards:[25,1,1,""],shuffle:[25,1,1,""],size:[25,2,1,""],slice:[25,1,1,""],states:[25,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[25,1,1,""],get_last_transition:[25,1,1,""],get_transition:[25,1,1,""],get_transitions_attribute:[25,1,1,""],insert:[25,1,1,""],is_empty:[25,1,1,""],length:[25,1,1,""],update_discounted_rewards:[25,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[26,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[26,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[27,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[27,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[27,0,1,""]},"rl_coach.environments.environment":{Environment:[27,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[27,2,1,""],close:[27,1,1,""],get_action_from_user:[27,1,1,""],get_available_keys:[27,1,1,""],get_goal:[27,1,1,""],get_random_action:[27,1,1,""],get_rendered_image:[27,1,1,""],goal_space:[27,2,1,""],handle_episode_ended:[27,1,1,""],last_env_response:[27,2,1,""],phase:[27,2,1,""],render:[27,1,1,""],reset_internal_state:[27,1,1,""],set_goal:[27,1,1,""],state_space:[27,2,1,""],step:[27,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[27,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[27,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[28,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[28,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[28,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[28,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[28,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[28,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[28,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[28,1,1,""],get_action:[28,1,1,""],requires_action_values:[28,1,1,""],reset:[28,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[28,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[28,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[28,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[28,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[28,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[31,0,1,""],BoxDiscretization:[31,0,1,""],BoxMasking:[31,0,1,""],FullDiscreteActionSpaceMap:[31,0,1,""],LinearBoxToBoxMap:[31,0,1,""],PartialDiscreteActionSpaceMap:[31,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[30,0,1,""],ObservationCropFilter:[30,0,1,""],ObservationMoveAxisFilter:[30,0,1,""],ObservationNormalizationFilter:[30,0,1,""],ObservationRGBToYFilter:[30,0,1,""],ObservationReductionBySubPartsNameFilter:[30,0,1,""],ObservationRescaleSizeByFactorFilter:[30,0,1,""],ObservationRescaleToSizeFilter:[30,0,1,""],ObservationSqueezeFilter:[30,0,1,""],ObservationStackingFilter:[30,0,1,""],ObservationToUInt8Filter:[30,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[30,0,1,""],RewardNormalizationFilter:[30,0,1,""],RewardRescaleFilter:[30,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[33,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[32,0,1,""],EpisodicHRLHindsightExperienceReplay:[32,0,1,""],EpisodicHindsightExperienceReplay:[32,0,1,""],SingleEpisodeBuffer:[32,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[32,0,1,""],ExperienceReplay:[32,0,1,""],PrioritizedExperienceReplay:[32,0,1,""],QDND:[32,0,1,""],TransitionCollection:[32,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[34,0,1,""]},"rl_coach.spaces":{ActionSpace:[35,0,1,""],AttentionActionSpace:[35,0,1,""],BoxActionSpace:[35,0,1,""],CompoundActionSpace:[35,0,1,""],DiscreteActionSpace:[35,0,1,""],GoalsSpace:[35,0,1,""],ImageObservationSpace:[35,0,1,""],MultiSelectActionSpace:[35,0,1,""],ObservationSpace:[35,0,1,""],PlanarMapsObservationSpace:[35,0,1,""],Space:[35,0,1,""],VectorObservationSpace:[35,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[35,1,1,""],contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""],sample_with_info:[35,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[35,0,1,""],clip_action_to_space:[35,1,1,""],contains:[35,1,1,""],distance_from_goal:[35,1,1,""],get_reward_for_goal_and_state:[35,1,1,""],goal_from_state:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""],sample_with_info:[35,1,1,""]},"rl_coach.spaces.ObservationSpace":{contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""]},"rl_coach.spaces.Space":{contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":31,"160x160":30,"1_0":[13,23],"1st":28,"20x20":31,"210x160":30,"2nd":28,"50k":39,"9_amd64":42,"abstract":[36,40],"boolean":[3,25,35,49],"break":38,"case":[0,3,5,20,24,25,28,35,48,49,50],"class":[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,39,43,49],"default":[0,28,50],"enum":[24,27,35],"export":[0,24,42],"final":[8,14,15,17,21,39],"float":[3,4,5,6,7,8,10,11,13,17,20,21,22,24,25,27,28,30,31,32,35,36,49],"function":[0,1,3,6,7,8,11,24,27,28,35,36,37,39,41,49],"import":[6,16,28,32,37,48,50],"int":[0,3,4,5,6,7,10,13,18,20,22,23,25,27,28,30,31,32,35,49],"long":41,"new":[0,3,7,8,11,20,21,24,25,31,39,40,47,48,49],"return":[0,3,8,10,11,12,17,20,21,23,24,25,27,28,30,32,35,36,37,39,48,49],"short":[0,39],"static":24,"super":[36,37],"switch":38,"true":[0,3,4,5,6,7,8,11,20,21,23,24,25,27,28,31,32,35,49],"try":[4,44,48],"while":[0,5,6,8,9,10,11,24,27,38,41,48,50],AWS:42,Adding:[16,47],And:[37,48],But:[38,48],Doing:48,For:[0,1,2,3,4,7,10,12,13,14,15,18,20,21,24,25,27,28,29,30,31,35,36,37,39,40,41,42,44,49,50],Has:24,Its:49,NFS:[26,42],One:[22,50],That:38,The:[0,1,2,3,4,5,6,7,8,10,11,12,13,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,38,39,40,41,42,44,45,47,48,49,50],Then:[4,7,8,12,19,21],There:[7,11,24,28,29,36,37,41,50],These:[1,2,3,22,27,34,40,41,42],Use:[1,2,8,19,20],Used:28,Uses:48,Using:[8,12,14,15,42],Will:24,With:[28,47],__init__:[27,36,37],_index:[5,18],_render:37,_restart_environment_episod:37,_take_act:37,_update_st:37,a2c:48,a3c:[10,18,38,48],a_i:20,a_t:[4,5,6,8,12,13,14,15,17,18,19,21,23],a_valu:5,abl:[31,48],about:[3,25,39,49,50],abov:[8,24,39],abs:[18,32],absolut:28,acceler:19,accept:27,access:[24,36,42],accord:[0,3,4,5,6,8,12,18,24,25,28,35,38,39,41,49],accordingli:[20,35,39,50],account:[4,7,11,20,21,28],accumul:[3,4,5,6,10,18,20,23,24,30,48,49],accumulate_gradi:24,accumulated_gradi:24,accur:48,acer:[3,48],acer_ag:6,aceralgorithmparamet:6,achiev:[0,4,7,27,30,32,35,44,48,50],across:[10,17,38],act:[3,4,8,12,22,35,36,39,49],action:[1,2,3,13,14,15,16,17,18,21,22,23,24,25,27,28,29,32,36,37,39,41,49],action_idx:37,action_intrinsic_reward:25,action_penalti:8,action_spac:[27,28],action_space_s:24,action_valu:[25,28],actioninfo:[3,35,39,49],actionspac:[28,35],actiontyp:37,activ:[8,24],actor:[3,6,7,8,11,28,41,48],actor_critic_ag:5,actorcriticag:36,actorcriticalgorithmparamet:5,actual:[4,5,13,14,15,22,23,28,31,32],adam:[7,24],adam_optimizer_beta1:24,adam_optimizer_beta2:24,adapt:[7,11],add:[8,9,19,25,28,30,37,39,42],add_rendered_image_to_env_respons:0,added:[0,4,6,7,10,11,20,28,32,36],adding:[3,11,28,36,49],addit:[3,24,25,27,28,30,32,35,37,38,39,41,47,48,49],addition:[24,27,30,36,37,39,44,45,50],additional_fetch:24,additional_simulator_paramet:[27,37],additionali:38,additive_nois:28,additivenoiseparamet:28,advanc:[23,47],advantag:[3,5,7,11,16,28],affect:[0,12,24],aforement:[14,15,21],after:[0,3,8,10,11,18,19,21,23,24,25,27,30,35,49,50],again:28,agent:[0,1,2,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,27,28,29,30,31,35,37,38,41,43,44,47,48,49],agent_param:40,agent_paramet:[3,24,49],agentparamet:[3,24,36],aggreg:39,ahead:[4,48],aim:28,algorithm:[3,25,28,36,38,39,40,44,46,47,49],algorithmparamet:[3,36],all:[0,3,10,12,20,21,24,25,27,28,30,31,35,36,37,38,39,40,41,42,45,49,50],all_action_prob:25,allow:[0,3,4,16,24,25,27,28,29,30,31,32,38,39,40,41,47,48,49,50],allow_brak:27,allow_duplicates_in_batch_sampl:32,allow_no_action_to_be_select:35,along:[20,27,28,45],alpha:[6,17,21,32],alreadi:[20,25,37,48],also:[5,6,7,20,21,24,27,35,36,38,44,48,50],altern:[27,37,45],alwai:[24,28,31],amazon:42,amazonaw:42,amount:[8,10,17,21,28,39,48],analysi:38,analyz:38,ani:[3,24,25,27,31,32,36,39,40,41,42,49],anoth:[3,16,24,29,49],answer:48,api:[27,41,45,47],appear:[3,49],appli:[0,3,5,8,10,18,24,25,28,30,48,49],applic:48,apply_and_reset_gradi:24,apply_gradi:24,apply_gradients_and_sync_network:24,apply_gradients_every_x_episod:[5,10,18],apply_gradients_to_global_network:24,apply_gradients_to_online_network:24,apply_stop_condit:0,appropri:42,approx:8,approxim:[41,48],apt:42,arbitrari:30,architectur:[3,16,36,47,49],architecture_num_q_head:28,area:31,arg:[3,24,42,49],argmax_a:[14,17,21],argument:[3,13,23,24,27,35,39,49],around:[24,25,41],arrai:[3,24,25,27,30,35,37,49],art:[3,43],artifact:42,artifici:32,arxiv:[18,32],aspect:[28,30,38],assign:[0,2,5,6,24,28],assign_kl_coeffici:24,assign_op:24,assum:[25,28,30,32,48],async:[24,40],async_train:24,asynchron:[5,18,24],atari:[15,27,30,42,50],atari_a3c:50,atari_dqn:50,ath:16,atom:[13,22,23],attach:27,attend:31,attent:31,attentionactionspac:31,attentiondiscret:31,attribut:25,attribute_nam:25,author:[27,44,45],auto_select_all_armi:27,autoclean:42,automat:[24,50],autonom:[27,45,47],autoremov:42,auxiliari:[27,45],avail:[4,24,25,27,38,40,42,47,48,50],averag:[6,7,11,24,38,39],avg:6,aws:42,axes:[30,38],axi:[30,38],axis_origin:30,axis_target:30,back:[7,40],backend:[24,40,42,47,50],background:50,backpropag:20,backward:24,balanc:2,band:38,bar:6,base1:42,base64:42,base:[7,11,17,19,21,27,32,36,39,42,45,48],base_paramet:[0,3,24,27,28],baselin:48,basic:[10,25,40,50],batch:[1,2,3,4,5,6,8,10,11,12,13,14,15,16,18,21,22,23,24,32,36,39,49],batch_siz:24,bc_agent:1,bcalgorithmparamet:1,becaus:39,becom:[8,40],been:[16,25,30,44,48],befor:[3,5,11,23,24,25,30,39,40,41,42,48,49],begin:[0,4,39],behav:35,behavior:[3,30,32,36,44,48,49,50],being:[3,36,47,48,49],bellman:[13,22,23],benchmark:[38,46,47,48],best:[48,50],beta1:24,beta2:24,beta:[6,8,10,32],beta_entropi:[5,6,7,10,11],better:[16,48],between:[0,1,2,3,6,7,8,10,11,13,17,18,20,22,23,24,25,27,28,31,32,35,36,38,39,41,47,48],bfg:[7,11],bia:[6,48],big:[11,13,23],bilinear:30,bin:[31,42],binari:12,bind:24,binomi:12,bit:30,blizzard:45,blob:[27,30],block:47,blog:47,boilerpl:39,bolling:38,bool:[0,3,4,5,6,7,8,11,20,21,23,24,25,27,28,32,35,49],boost:[42,48],bootstrap:[3,5,6,7,8,11,17,18,20,21,23,25,48],bootstrap_total_return_from_old_polici:[20,25],both:[3,7,24,27,28,31,48,49],bound:[6,7,11,13,23,28,35,48],box2d:42,box:[28,31,35],boxactionspac:31,boxdiscret:31,boxmask:31,breakout:50,breakoutdeterminist:[27,50],bring:11,bucket:42,buffer:[1,2,3,6,12,13,14,15,18,20,21,22,23,32,39,48,49,50],build:[29,47,48],builder:42,built:[36,39],bullet:6,button:[38,50],c51:13,cach:42,calcul:[3,4,5,6,7,8,10,11,12,13,14,15,17,18,20,21,22,23,24,25,28,32,36,49],call:[0,3,10,18,24,25,27,39,49],call_memori:[3,49],callabl:35,camera:[27,37],camera_height:27,camera_width:27,cameratyp:[27,37],can:[0,2,3,5,6,7,8,11,21,24,25,27,28,29,30,31,35,36,37,38,39,41,45,47,49,50],cannot:[3,49],carla:[30,45],carla_environ:27,carlaenviron:27,carlaenvironmentparamet:27,carlo:[3,21],cartpol:[27,37],cartpole_a3c:50,cartpole_clippedppo:[42,50],cartpole_dqn:50,categor:[3,5,6,48],categori:[29,30],categorical_dqn_ag:13,categoricaldqnalgorithmparamet:13,caus:[30,38],cdot:[5,7,8,10,12,13,14,15,17,19,21,23],central:[24,38],chain:8,challeng:39,chang:[0,3,6,7,8,11,12,16,18,21,28,39,42,49],change_phas:28,channel:[27,30],channels_axi:35,check:[0,3,25,35,49],checkpoint:[0,3,24,26,40,42,49,50],checkpoint_dir:[3,49],checkpoint_prefix:[3,49],checkpoint_restore_dir:[0,50],checkpoint_save_dir:0,checkpoint_save_sec:0,child:24,chmod:42,choic:[36,42],choos:[3,16,21,28,29,31,35,36,39,41,48,49,50],choose_act:[3,36,39,49],chosen:[3,21,28,31,36,49],chunk:11,cil:48,cil_ag:2,cilalgorithmparamet:2,classic_control:42,clean:[27,36,42],cli:42,clip:[3,6,8,11,24,30,35,48],clip_action_to_spac:35,clip_critic_target:8,clip_gradi:24,clip_high:28,clip_likelihood_ratio_using_epsilon:[7,11],clip_low:28,clip_max:30,clip_min:30,clipbyglobalnorm:24,clipped_ppo_ag:7,clippedppoalgorithmparamet:7,clipping_high:30,clipping_low:30,clone:[3,48],close:27,cmake:42,coach:[0,3,24,26,27,28,29,33,34,36,39,43,44,45,48,50],code:[37,39,48],coeffici:[7,11,24,28,32],collect:[3,7,10,11,18,24,25,32,39,44,47,49,50],collect_sav:[3,24,49],color:30,com:42,combin:[23,41,47,48],comma:0,command:[39,42,50],common:[36,38,42,50],commun:40,compar:[0,11,16,48],complet:[25,28,39],complex:[24,29,39,41,48,50],compon:[3,13,23,24,28,34,36,39,47,49,50],composit:[3,49],compositeag:[3,49],comput:[24,28],concat:24,concentr:39,condit:[0,3],confid:28,config:[27,50],configur:[3,5,10,36,42,49],confus:39,connect:24,connectionist:10,consecut:[8,20],consequ:[18,28],consid:[5,6,31,38],consist:[8,27,30,31,35,39,45],constant:6,constantli:50,constantschedul:32,constrain:31,construct:[24,32],consumpt:30,contain:[0,1,2,3,12,24,25,27,35,37,39,49,50],content:42,contin:40,continu:[1,2,5,8,9,10,19,28,29,31,35,44],continuous_entropi:28,continuous_exploration_policy_paramet:28,contribut:[4,47],control:[2,3,5,6,7,8,11,24,28,30,38,45,47,48,49],control_suite_environ:27,controlsuiteenviron:27,conveni:[38,50],converg:10,convers:29,convert:[3,25,28,30,35,39,41,49],convolut:[24,41],coordin:31,copi:[8,12,13,14,15,17,18,19,21,22,23,24,42],core:[3,47,49],core_typ:[3,25,27,35,49],correct:[3,6,48],correctli:24,correl:28,correpond:25,correspond:[2,3,4,13,14,24,25,28,30,35,37,49],could:[3,24,35,42,49],count:17,countabl:31,counter:[3,49],counterpart:41,cpu:[0,24],crd:50,creat:[3,18,24,30,37,49,50],create_network:[3,49],create_target_network:24,creation:[3,49],credenti:42,critic:[3,6,7,8,11,28,41,48],crop:[30,31],crop_high:30,crop_low:30,cross:[1,13,23],csv:0,ctrl:38,cuda:42,cudnn7:42,curl:42,curr_stat:[3,36,49],current:[0,1,2,3,4,6,7,8,9,10,11,12,14,15,17,19,20,21,22,24,25,27,28,30,31,35,36,39,47,48,49],custom:[27,28,35,36,39],custom_reward_threshold:27,cycl:39,dai:50,dashboard:[0,3,42,47,49],data:[0,10,18,24,32,39,40,42,44,47,48,50],data_stor:[26,42],dataset:[7,11,48,50],date:[20,41,48,50],dcp:[42,50],ddpg:48,ddpg_agent:8,ddpgalgorithmparamet:8,ddqn:[17,21,48],deal:48,debug:[0,38,47],decai:[5,7,11,24],decid:[0,3,4,27,36,49],decis:[3,49],decod:42,dedic:24,deep:[0,3,5,12,14,16,18,19,23,49],deepmind:45,def:[36,37],default_act:35,default_input_filt:37,default_output_filt:37,defin:[0,3,5,6,7,10,11,18,20,21,24,25,27,28,30,31,32,35,36,37,39,40,41,44,45,49,50],definit:[3,24,27,35,37,39,49],delai:48,delta:[6,13,20,23],demonstr:[1,2,50],dens:28,densiti:17,depend:[0,3,6,24,30,32,35,37,42,44,48,49],deploi:[34,40],depth:27,descend:48,describ:[3,13,22,30,32,36,39,42,49],descript:[3,31,35,43,50],design:[39,42,47],desir:[31,36],destabil:10,detail:[3,25,43,45,47,50],determin:[2,3,20,25,32,49],determinist:[3,48],dev:42,develop:[39,44],deviat:[10,11,28,30,38],devic:24,dfp:48,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,24,25,27,28,35,49],dict_siz:32,dictat:4,dictionari:[2,3,24,25,27,32,35,36,49],did:27,differ:[0,1,2,3,4,5,6,7,10,11,12,16,24,27,28,30,35,36,37,38,40,41,47,48,49],differenti:16,difficult:[38,44],difficulti:50,dimens:[25,27,30,31],dimension:[11,31],dir:[3,49,50],direct:[3,27,49],directli:[3,5,39,41,49],directori:[0,24,36,38,42,50],disabl:50,disable_fog:27,disappear:27,disassembl:48,discard:[25,30],discount:[8,10,11,17,20,21,23,24,25,48],discret:[1,2,4,7,11,12,13,14,15,16,17,18,20,21,22,23,28,29,30,31,35,39],disentangl:39,disk:0,displai:[0,38],distanc:35,distance_from_go:35,distance_metr:35,distancemetr:35,distil:[3,49],distribut:[3,5,6,10,11,13,22,23,24,26,28,33,34,35,41,47,48,49,50],distributed_coach:40,distributed_coach_synchronization_typ:40,distributedcoachsynchronizationtyp:40,divereg:[7,11],diverg:[6,7,11,23],dnd:[0,20,48],dnd_key_error_threshold:20,dnd_size:20,do_action_hindsight:32,doc:42,docker:42,dockerfil:42,document:45,doe:[12,24,30],doesn:40,doing:[7,11,29],domain:41,don:[4,28,38,48],done:[0,3,7,10,11,27,30,37,49,50],doom:[27,37,42,45],doom_basic_bc:50,doom_basic_dqn:50,doom_environ:[27,37,50],doomenviron:[27,37],doomenvironmentparamet:[37,50],doominputfilt:37,doomlevel:27,doomoutputfilt:37,doubl:[3,17,23],down:[24,27],download:42,dpkg:42,dqn:[3,17,18,23,27,28,30,31,39,41,48],dqn_agent:[15,49],dqnagent:49,dqnalgorithmparamet:15,drive:[2,27,45,47],driving_benchmark:27,due:30,duel:[3,23],dump:[0,3,49],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,49],dump_one_value_per_step:[3,49],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,7,10,11,12,20,28,38,39,49,50],dynam:[38,44,48],e_greedi:28,each:[0,1,2,3,4,5,6,7,10,11,12,14,15,16,18,20,21,22,24,25,27,28,29,30,31,32,35,36,38,39,40,41,42,44,48,49],eas:38,easi:[37,38,47],easier:41,easili:[28,50],echo:42,effect:[0,3,6,7,18,30,39,49],effici:[6,39,48],either:[0,3,5,18,24,28,35,38,41,50],element:[3,12,24,30,35],elf:42,embbed:24,embed:[3,20,24,49],embedd:[24,41],embedding_merger_typ:24,embeddingmergertyp:24,emploi:48,empti:25,emul:[3,6,49],emulate_act_on_train:[3,49],emulate_observe_on_train:[3,49],enabl:[24,41,50],encod:[30,35],encourag:[19,21,39],end:[2,3,10,23,25,27,30,49,50],enforc:31,engin:[27,45],enough:[4,6,20],ensembl:[28,48],ensur:[6,24],enter:[3,49,50],entir:[11,17,20,23,28,31,39],entri:[20,39],entropi:[1,5,6,7,10,11,13,23,28],enumer:35,env:[25,42],env_param:37,env_respons:[3,49],enviorn:27,environ:[0,3,4,6,16,24,25,28,29,30,31,35,36,39,42,44,46,47,49],environmentparamet:[27,37],envrespons:[0,3,27,49],episod:[0,3,4,5,10,11,12,17,18,23,27,28,36,37,38,39,40,49,50],episode_max_tim:27,episodic_hindsight_experience_replai:32,epoch:7,epsilon:[7,28,32],epsilon_schedul:28,equal:2,equat:[8,14,15,18,22],error:[24,48],escap:50,especi:16,essenti:[18,24,31,37,39,42],estim:[5,7,11,12,17,21,28],estimate_state_value_using_ga:[5,7,11],eta:[7,11],etc:[0,3,24,27,29,35,36,45,49],evalu:[0,3,24,25,28,39,49],evaluate_onli:0,evaluation_epsilon:28,evaluation_noise_percentag:28,even:[16,24,27,37,38,39,48],everi:[0,5,6,8,10,12,13,14,15,17,18,19,21,22,23,50],exact:[20,28,44],exactli:24,exampl:[2,3,4,24,25,27,28,29,30,31,35,36,37,39,41,49,50],except:[18,25],execut:[25,38,39],exhibit:[3,36,49],exist:[20,24],exit:[3,49],expand_dim:25,expect:[0,3,28,44,49],experi:[0,6,8,11,23,27,32,33,38,39,40,42,47,48,50],experiment_path:[0,27],experiment_suit:27,experimentsuit:27,expert:[1,2,25,48],exploit:[28,39],explor:[3,4,5,6,7,8,9,11,12,17,19,20,36,39,47,48],exploration_polici:28,explorationparamet:[3,28,36],exponenti:[6,7,11,23,24],expor:3,export_onnx_graph:0,expos:[38,41,47],extend:[27,28,45],extens:[27,45],extent:50,extern:0,extra:[24,25,41],extract:[3,19,20,25,30,35,38,39,49],factor:[8,10,11,21,23,24,25,28,30],faithfulli:38,fake:35,fals:[0,3,8,24,25,27,28,31,32,35,37,49],far:[11,30,39,44],faster:[16,48],featur:[8,27,41,47,48],feature_minimap_maps_to_us:27,feature_screen_maps_to_us:27,fetch:[24,25],fetched_tensor:24,few:[10,12,13,14,15,17,21,22,23,28,37],field:[44,47],file:[0,3,36,39,49,50],fill:[25,37],filter:[0,3,47,49],find:[14,38,45,47],finish:[20,50],finit:31,first:[0,8,11,12,20,22,23,24,25,30,39,41],fit:35,flag:[0,3,24,25,27,49],flexibl:40,flicker:27,flow:[29,47],follow:[2,3,5,6,8,10,13,14,15,18,19,20,22,23,24,25,27,28,32,36,37,42,44,48,49],footprint:30,forc:[24,27,31,37],force_cpu:24,force_environment_reset:[27,37],force_int_bin:31,forced_attention_s:35,form:[4,18,35,48],format:36,formul:[5,6],forward:[24,28],found:[3,43,50],frac:[6,7,13,23],fraction:[7,11],frame:[0,27],frame_skip:27,framework:[0,3,24,36,47,49],framework_typ:0,free:[27,45],freeglut3:42,from:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,33,35,36,37,38,39,40,41,42,44,45,47,49,50],full:[3,10,17,31,49],fulldiscreteactionspacemap:31,fulli:24,func:[3,49],futur:[0,3,10,25,48],future_measurements_weight:4,gae:[5,7,11],gae_lambda:[5,7,11],game:[3,25,27,45,47,49,50],game_ov:25,gamma:[5,6,8,12,13,14,15,17,18,19,20,21,23],gap:[21,48],gather:40,gaussian:[11,28],gener:[0,5,7,11,12,24,27,28,32,35,36,42,50],general_network:36,get:[3,4,7,8,9,10,11,12,14,15,17,19,21,24,25,27,28,35,39,41,42,44,49],get_act:28,get_action_from_us:27,get_available_kei:27,get_first_transit:25,get_goal:27,get_last_env_respons:27,get_last_transit:25,get_output_head:36,get_predict:[3,49],get_random_act:27,get_rendered_imag:[27,37],get_reward_for_goal_and_st:35,get_state_embed:[3,49],get_transit:25,get_transitions_attribut:25,get_variable_valu:24,get_weight:24,gfortran:42,gif:0,git:42,github:[37,42,44,47],given:[0,1,2,3,4,5,8,10,11,24,25,27,28,30,31,32,35,36,39,49],given_weight:24,global:[3,24,41,49],global_network:24,glx:42,goal:[1,2,3,4,6,24,25,27,32,39,41,48,49],goal_from_st:35,goal_nam:35,goal_spac:27,goal_vector:4,goals_spac:32,goalsspac:[32,35],goaltorewardconvers:35,going:29,good:[37,38],gpu:[0,24],gracefulli:50,gradient:[3,5,6,7,11,18,20,24,36,48,49],gradientclippingmethod:24,gradients_clipping_method:24,granular:32,graph:0,graphmanag:39,grayscal:[30,35],greedili:39,group:38,grow:23,guidelin:48,gym:[42,45],gym_environ:[27,50],gymenviron:27,gymenvironmentparamet:37,hac:48,had:44,hand:[16,30,39,48],handl:4,handle_episode_end:[3,27,49],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[38,48],harder:38,has:[0,3,16,20,21,25,28,30,39,41,44,48,49],has_glob:24,has_target:24,hat:[6,7,13,23],have:[0,3,4,6,24,27,28,30,31,32,39,41,44,49],head:[1,2,3,5,6,10,12,16,19,20,24,28,36,41,49],headparamet:24,heads_paramet:24,health_gath:27,heat:6,heatup:[28,39],help:[21,25,38,39,48],here:[37,39],heurist:[11,28],hide:41,hierarch:[35,39],hierarchi:[3,39,48,49],high:[8,11,30,31,35,38],high_i:35,high_kl_penalty_coeffici:11,high_x:35,higher:11,highest:[5,6,10,21,28,30,31,35],highli:[0,37,48],hindsight:[9,32,48],hindsight_goal_selection_method:32,hindsight_transitions_per_regular_transit:32,hindsightgoalselectionmethod:32,hold:[12,24,25,32,38,39,41],horizont:[42,47,50],host:42,hostnam:0,hot:35,how:[4,7,11,28,40,42,48,50],hrl:32,html:42,http:[18,32,42],hub:42,huber:22,huber_loss_interv:22,human:[0,27],human_control:27,hyper:[36,44],hyperparamet:36,ident:24,identifi:[24,35],ignor:27,imag:[0,24,27,30,31,35,37,41,50],image1:42,imit:[3,25,43,48],impact:24,implement:[3,7,11,24,26,27,28,32,36,37,40,44,48,50],impli:50,implment:34,importance_weight:24,importance_weight_trunc:6,importantli:39,improv:[5,16,23,27,39,48],includ:[0,3,4,27,29,30,34,41,45,49,50],increas:[11,21,30,48],increment:[3,49],index:[0,2,25,27,30,31,32,35],indic:35,inf:[30,35],infer:[3,24,27,49],infinit:48,info:[3,12,25,35,37,49],info_as_list:25,inform:[3,4,18,25,27,29,38,39,42,45,49],inherit:[3,36,37],init_environment_dependent_modul:[3,49],initi:[3,4,11,21,24,25,36,39,47,49],initial_feed_dict:24,initial_kl_coeffici:11,innov:48,input:[1,2,3,4,8,12,14,15,17,19,20,21,24,29,35,39,41,49],input_embedders_paramet:24,input_high:30,input_low:30,input_space_high:31,input_space_low:31,inputembedderparamet:24,inputfilt:39,insert:[20,25],inspect:0,instal:[42,50],instanc:[3,33,35,41],instanti:[3,27,39],instead:[0,3,7,18,21,24,30,31,39,48,49],instruct:50,intact:[12,44],integ:[0,30,31],integr:[37,39,40,47],intel:47,intend:[10,24,28,39],interact:[25,39,40,47,50],interest:[24,38],interfac:[27,38,40,45],intermedi:20,intern:[3,10,18,24,25,29,39,49,50],interpol:30,intersect:48,interv:22,intrins:25,intro:47,introduc:48,invers:[27,45],invok:39,involv:36,is_empti:25,is_valid_index:35,item:25,iter:[3,5,6,8,11,16,24,49],its:[0,3,13,23,24,25,28,35,39,42,48,49,50],itself:[24,35,50],job:0,job_typ:0,joint:27,json:0,jump:[4,31],jupyt:36,just:[3,11,21,23,37,39,41,49,50],kapa:22,keep:[15,25,30,50],kei:[2,20,24,25,27,32,36,38,42,50],key_error_threshold:32,key_width:32,keyboard:[27,50],keyword:24,kl_coeffici:24,kl_coefficient_ph:24,know:[3,48,49,50],knowledg:[3,39,49],known:[25,38,44,48],kubeconfig:34,kubernet:42,kubernetes_orchestr:34,kubernetesparamet:34,kwarg:[24,27],l2_norm_added_delta:20,l2_regular:24,lack:38,lamb:28,lambda:[5,7,11,28],lane:2,larg:[28,30,45],larger:24,last:[4,6,11,20,25,27,30],last_env_respons:27,lastli:39,later:[0,3,24,49,50],latest:[18,20,39,42],layer:[24,28,32,39,41],lazi:[25,30],lazystack:30,lbfg:24,ld_library_path:42,lead:28,learn:[0,3,4,5,6,8,9,10,12,13,14,15,16,19,22,23,24,25,27,28,30,38,39,41,43,44,45,48,49],learn_from_batch:[3,36,39,49],learner:24,learning_r:[24,32],learning_rate_decay_r:24,learning_rate_decay_step:24,least:[41,48],leav:[11,12],left:[2,6,48],length:[4,5,7,11,18,20,24,25],less:[16,48],level:[0,3,24,27,37,49,50],levelmanag:[3,39,49],levelselect:27,libatla:42,libav:42,libavformat:42,libbla:42,libboost:42,libbz2:42,libfluidsynth:42,libgl1:42,libglew:42,libgm:42,libgstream:42,libgtk2:42,libgtk:42,libjpeg:42,liblapack:42,libnotifi:42,libopen:42,libosmesa6:42,libportmidi:42,librari:[27,42,45],libsdl1:42,libsdl2:42,libsdl:42,libsm:42,libsmpeg:42,libswscal:42,libtiff:42,libwebkitgtk:42,libwildmidi:42,like:[27,35,39,41,42,48],likelihood:[7,11],line:[3,39,49,50],linear:31,linearboxtoboxmap:31,linearli:31,list:[0,3,4,24,25,27,28,30,31,35,36,49,50],load:[0,38,40,50],load_memory_from_file_path:50,local:[3,41,42,49],locat:[22,25,30,48],log:[0,3,5,6,10,49],log_to_screen:[3,49],logger:[0,3,49],look:[37,42],loop:39,loss:[1,2,3,6,7,10,11,13,14,15,22,23,24,28,36,41,49],lot:[28,38,44,48],low:[8,11,30,31,35],low_i:35,low_x:35,lower:[0,32,39],lowest:[30,31,35],lstm:41,lumin:30,lvert:[6,13,23],lvl:50,mai:[0,24,43,50],main:[3,36,39,41,43,49,50],mainli:40,major:28,make:[0,3,24,27,36,38,42,44,48,49],manag:[3,24,40,42,49],mandatori:[35,37,41],mani:[3,16,43,44],manner:[11,17,18,21,30,39],manual:42,map:[3,24,27,29,30,31,35,36,49],mark:25,markdown:49,mask:[12,31],masked_target_space_high:31,masked_target_space_low:31,master:[3,39,42,49],match:[2,20,24,35],mathbb:[5,6],mathop:5,max:[5,6,13,18,23,30],max_a:[12,15,20,21],max_action_valu:25,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_kl_diverg:6,max_over_num_fram:27,max_simultaneous_selected_act:35,max_siz:32,max_spe:27,maxim:[4,14],maximum:[0,13,15,20,21,25,27,28,30,32],mean:[0,2,7,8,9,10,11,19,24,28,30,31,35,38,48],meant:41,measur:[3,4,24,27,30,35,37,48,49],measurements_nam:35,mechan:[29,40,44,50],memor:48,memori:[3,23,25,30,36,39,40,42,47,48,49],memory_backend:42,memorygranular:32,memoryparamet:[3,36],merg:[24,27],mesa:42,method:[0,5,7,11,18,24,30,32],metric:[0,35,38],mid:6,middlewar:[20,24,41],middleware_paramet:24,middlewareparamet:24,midpoint:22,might:[3,10,27,36,41,49],min:[6,7,13,21,23],min_reward_threshold:0,mind:50,minim:[2,4,13],minimap_s:27,minimum:[0,7,30],mix:[3,7,11,20,21,48],mixedmontecarloalgorithmparamet:17,mixer1:42,mixtur:[17,24],mjkei:42,mjpro150:42,mjpro150_linux:42,mkdir:42,mmc:[17,48],mmc_agent:17,mode:[21,24,26,33,34,39,40,42,50],model:[0,17,19,24,47,50],modif:48,modifi:6,modul:[3,36,39,40,49],modular:[36,39,41,47],monitor:40,mont:[3,21],monte_carlo_mixing_r:[17,21],more:[3,8,18,24,30,36,38,39,41,42,47,49,50],moreov:38,most:[3,10,20,24,25,28,41,44,48,49,50],mostli:[30,39],motiv:39,move:[6,7,11,30,38,44],mp4:0,mse:[2,6,14,15,22],much:[7,11,39,48],mujoco:[27,31,37,42,45],mujoco_kei:42,mujoco_pi:42,multi:[11,24,35,41],multiarrai:[3,49],multidimension:35,multipl:[4,7,11,18,24,27,28,30,31,32,35,38,39,44,47,50],multipli:[4,10,24,30],multiselect:31,multitask:[27,45],must:[24,30,35,44],mxnet:50,n_step:[20,23,25,32],n_step_discounted_reward:25,n_step_q_ag:18,nabla:[6,8],nabla_:8,nabla_a:8,naf:48,naf_ag:19,nafalgorithmparamet:19,name:[3,24,25,27,30,35,36,42,49,50],namespac:34,nasm:42,nativ:[0,27,37,45],native_rend:0,navig:3,ndarrai:[3,24,25,27,28,30,31,35,37,49],nearest:20,neat:38,nec:[0,48],nec_ag:20,necalgorithmparamet:20,necessari:[3,20,24,49],necessarili:30,need:[0,3,6,23,24,27,28,35,36,39,44,48,49,50],neg:[4,30],neighbor:20,neon_compon:36,nervanasystem:42,network:[0,3,24,28,36,39,44,47,48,49,50],network_input_tupl:24,network_nam:[3,49],network_param:28,network_paramet:24,network_wrapp:[3,24,49],networkparamet:[3,24,28,36],networkwrapp:[3,49],neural:[3,17,24,41,44],never:24,new_value_shift_coeffici:[20,32],new_weight:24,newli:[21,37,48],next:[3,8,14,15,19,21,22,25,27,39,49,50],next_stat:25,nfs_data_stor:26,nfsdatastoreparamet:26,nice:50,no_accumul:24,node:[24,41],nois:[8,9,19,28,39],noise_percentage_schedul:28,noisi:[10,23,28],non_episod:32,none:[0,3,7,8,11,24,25,27,28,30,31,35,37,49],norm:24,norm_unclipped_grad:24,norm_unclippsed_grad:24,normal:[3,4,10,28,29,30,35],note:[20,24,28,49],notebook:36,notic:[24,48],notori:[38,44,48],now:[7,37],nstepqalgorithmparamet:18,nth:23,num_act:[20,32,35],num_bins_per_dimens:31,num_class:32,num_consecutive_playing_step:[3,8,49],num_consecutive_training_step:[3,49],num_gpu:0,num_neighbor:32,num_predicted_steps_ahead:4,num_speedup_step:27,num_steps_between_copying_online_weights_to_target:[8,18],num_steps_between_gradient_upd:[5,6,10,18],num_task:0,num_training_task:0,num_transitions_to_start_replai:6,num_work:0,number:[0,2,4,5,6,8,10,12,13,18,20,22,23,24,25,27,28,30,31,32,38,45,50],number_of_knn:20,numpi:[3,24,25,27,28,30,31,35,37,49],nvidia:42,object:[0,3,23,24,27,28,30,32,39,49],observ:[0,3,4,11,24,25,27,29,37,39,49],observation_reduction_by_sub_parts_name_filt:30,observation_rescale_size_by_factor_filt:30,observation_rescale_to_size_filt:30,observation_space_s:24,observation_space_typ:27,observation_stat:30,observation_typ:27,observationspac:35,observationspacetyp:27,observationtyp:27,obtain:[3,49],off:[6,40,48],offer:[27,45],often:[38,39,41],old:[7,11,24,48],old_weight:24,onc:[0,7,10,11,12,13,14,15,17,18,21,22,23,24,35,50],one:[0,3,6,16,20,21,24,25,27,28,29,32,35,37,38,41,48,49],ones:[37,48],onli:[0,3,4,5,6,7,10,11,12,13,15,16,18,20,22,23,24,25,27,28,30,31,37,39,48,49,50],onlin:[8,12,13,14,15,17,18,19,20,21,22,23,24,39,41],online_network:24,onnx:[0,24],onto:29,open:[0,27,45],openai:[42,45],opencv:42,oper:[21,24,30],optim:[3,4,6,24,43,48],optimization_epoch:7,optimizer_epsilon:24,optimizer_typ:24,option:[6,10,24,27,31,35,36,38,40,41,50],orchestr:[40,42,47],order:[0,3,5,6,7,8,10,11,14,15,16,18,19,20,21,22,24,25,29,30,31,38,39,41,44,48,49],org:[18,32],origin:[18,30,31,44],ornstein:[8,9,28],other:[0,2,10,16,21,24,27,29,30,32,38,39,48],otherwis:[11,12,24,27,28,35],ou_process:28,our:7,out:[2,14,15,28,29,31,38,42,47,48,50],outcom:[28,39],output:[0,4,6,8,12,13,19,20,24,28,29,30,35,36,41],output_0_0:24,output_observation_spac:30,outputfilt:39,outsid:[4,28],over:[3,7,10,11,18,20,23,24,25,28,30,31,38,39,48,49],overestim:8,overfit:11,overhead:0,overlai:38,override_existing_kei:32,overriden:36,overview:39,overwhelm:39,overwritten:24,own:[24,36],p_j:[13,23],page:[3,44],pair:[0,35],pal:[21,48],pal_ag:21,pal_alpha:21,palalgorithmparamet:21,paper:[5,10,13,18,20,22,27,32,44],parallel:[6,24,38,41],parallel_predict:24,param:[3,24,25,26,27,28,33,34,36,37,49],paramet:[2,3,4,5,6,7,8,10,11,13,17,18,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,44,47,49,50],parameter_nois:28,parameters_server_host:0,parent:[3,24,49],parent_path_suffix:[3,24,49],parmet:3,pars:39,part:[0,12,24,25,28,30,31,40,41,44,48],part_nam:30,partial:31,partialdiscreteactionspacemap:31,particular:4,particularli:[27,28,35,44,48],pass:[0,4,8,9,19,20,24,27,28,29,37,38,39,41,50],patamet:20,patchelf:42,patchelf_0:42,path:[0,3,24,36,37,42,49,50],pattern:39,pdf:32,penal:[7,8,11],penalti:11,pendulum_hac:37,pendulum_with_go:37,pendulumwithgo:37,per:[0,3,4,35,36,39,49],percentag:28,percentil:28,perceptron:41,perform:[0,3,6,24,25,30,32,37,38,39,48,49],period:[41,50],persist:3,persistent_advantage_learn:21,perspect:13,phase:[3,6,7,8,9,11,24,27,28,39,49],phi:[13,23],physic:[27,45],pi_:[6,7],pick:27,pickl:50,pip3:42,pip:42,pixel:27,place:[31,38,39],placehold:[24,28],plai:[0,3,10,12,14,15,18,28,36,38,49],plain:41,planarmap:27,planarmapsobservationspac:30,platform:[27,45],pleas:[18,44],plu:24,plugin:42,point:[30,35,39,40],polici:[1,3,4,5,6,9,12,18,19,20,26,36,39,40,41,42,43,47,48],policy_gradient_rescal:[5,7,10,11],policy_gradients_ag:10,policygradientalgorithmparamet:10,policygradientrescal:[5,7,10,11],policyoptimizationag:36,popul:39,popular:[27,45],port:0,posit:[4,30],possibl:[2,3,4,20,28,31,35,38,41,47,48,49,50],post:[29,47],post_training_command:[3,49],power:[27,45],ppo:[7,11,48],ppo_ag:11,ppoalgorithmparamet:11,pre:[8,28,29],predefin:[12,21,28,50],predict:[1,2,3,5,6,7,8,11,12,13,14,15,21,22,23,24,28,41,48,49],prediction_typ:[3,49],predictiontyp:[3,49],prefect:48,prefer:24,prefix:[3,49],prep:42,prepar:[3,49],prepare_batch_for_infer:[3,49],present:[16,20,27,30,48],preset:[0,5,36,37,39,40,42,50],press:[38,50],prevent:[8,11,39],previou:30,previous:[11,24],print:[0,3,50],print_networks_summari:0,priorit:[23,32],prioriti:[23,32],privat:35,probabilit:[5,6],probabl:[3,5,6,10,12,13,23,25,28,36,48,49],procedur:6,process:[0,3,8,9,24,28,29,30,31,36,38,39,41,44,47,49],produc:24,progress:24,project:[13,23],propag:7,propagate_updates_to_dnd:20,properti:[24,32,36,37,42],proport:32,provid:[24,40],proxi:39,proxim:3,pub:[33,34,42],publish:44,purpos:[0,3,10],pursuit:2,pybullet:[27,45],pygam:[0,42],pytest:42,python3:42,python:[27,32,36,42,45,47],qr_dqn_agent:22,quad:6,qualiti:27,quantil:[3,48],quantileregressiondqnalgorithmparamet:22,queri:[20,24,39,48],question:48,quit:38,r_i:[5,18],r_t:[4,6,7,23],rainbow:[3,36,48],rainbow_ag:36,rainbow_dqn_ag:23,rainbowag:36,rainbowagentparamet:36,rainbowalgorithmparamet:36,rainbowdqnalgorithmparamet:23,rainbowexplorationparamet:36,rainbowmemoryparamet:36,rainbownetworkparamet:36,rais:[3,25,49],ramp:[36,39],random:[0,18,27,28,35,39,44],random_initialization_step:27,randomli:[25,39],rang:[4,7,8,11,13,23,27,28,30,31,35,48],rare:20,rate:[0,6,17,20,24,27,41],rate_for_copying_weights_to_target:[6,8],rather:[4,38],ratio:[6,7,11,17,30],ratio_of_replai:6,raw:[27,45],reach:[0,11,35],read:26,readabl:39,readm:42,real:3,reason:[30,44],rebuild_on_every_upd:32,receiv:[24,25],recent:[3,23,24,48,49],recommend:37,redi:[33,34,42],redispubsub:42,redispubsubmemorybackendparamet:33,reduc:[1,2,10,11,21,24,30,39,48],reduct:30,reduction_method:30,reductionmethod:30,redund:30,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,40,42],referenc:3,regard:[3,49],region:[6,48],regist:[3,49],register_sign:[3,49],registri:42,regress:[2,3,48],regula:[6,7,11],regular:[5,7,10,11,18,20,24,28,31,32,48],regularli:24,reinforc:[3,5,8,9,10,13,14,15,16,18,21,22,23,27,28,38,39,41,43,44,45,48],rel:28,relat:[24,42],relationship:48,releas:[47,48],relev:[3,12,28,30,49],remov:30,render:[0,3,27,37],reorder:30,repeat:[27,39],replac:[28,30,32,42],replace_mse_with_huber_loss:24,replai:[1,2,3,6,8,12,13,14,15,18,20,21,22,23,32,39,48,49,50],replay_buff:50,replicated_devic:24,repo:37,repositori:47,repres:[0,7,11,13,23,24,25,27,28,31,35,50],represent:41,reproduc:[39,44],request:[3,24,49],requir:[3,24,26,28,30,38,41,42,48,49],requires_action_valu:28,rescal:[4,5,7,10,11,24,29,30],rescale_factor:30,rescaleinterpolationtyp:30,rescaling_interpolation_typ:30,research:[27,44,45],reset:[3,20,24,27,28,37,49],reset_accumulated_gradi:24,reset_evaluation_st:[3,49],reset_gradi:24,reset_internal_st:[3,27,49],resourc:[40,42],respect:[8,25,27],respons:[3,25,27,39,49],rest:[24,25,31,42],restart:37,restor:[0,3,49],restore_checkpoint:[3,49],result:[3,4,13,14,15,16,22,23,24,30,31,44,48,49,50],ret:6,retrac:6,retriev:[20,32],return_additional_data:32,reus:39,reusabl:41,reward:[0,1,2,3,4,8,10,17,18,23,24,25,27,29,35,37,38,39,48,49],reward_test_level:0,reward_typ:35,rgb:[27,30,35],rho:[6,8],rho_t:6,right:[2,3,6,28,31,38,48,49],rl_coach:[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,37,42,49,50],rms_prop_optimizer_decai:24,rmsprop:24,roboschool:[27,45],robot:[27,35,45,47],roboti:42,rollout:[3,26,33,34,40,42,49,50],root:[38,42],rule:[8,12],run:[0,3,4,8,10,11,12,14,15,20,21,24,27,28,30,49,50],run_pre_network_filter_for_infer:[3,49],runphas:[3,49],runtim:42,rvert:[13,23],rvert_2:6,s3_bucket_nam:42,s3_creds_fil:42,s3_data_stor:26,s3_end_point:42,s3datastoreparamet:26,s_t:[4,5,6,8,12,13,14,15,17,18,19,21,23],sai:48,same:[3,4,7,10,17,18,21,24,27,31,32,38,41,44,48,49],sampl:[1,2,3,5,6,8,10,11,12,13,14,15,17,18,21,22,23,24,28,32,35,39,42,49],sample_with_info:35,satur:8,save:[0,3,23,24,28,42,49,50],save_checkpoint:[3,49],saver:[3,24,49],savercollect:[3,24,49],scale:[4,10,24,30,38,42,47,50],scale_down_gradients_by_number_of_workers_for_sync_train:24,scale_measurements_target:4,scaler:24,schedul:[7,28,32,39,40,42,50],scheme:[5,28,39,48],schulman:11,sci:42,scienc:44,scipi:[30,42],scope:24,scratch:48,scratchpad:0,screen:[3,27,37,50],screen_siz:27,script:39,second:[0,24,38,48,50],section:[42,43,45],see:[3,27,30,42,44,45,48,49,50],seed:[0,27,44],seen:[4,20,21,27,30,39,44,48],segment:[27,35],select:[5,12,20,24,25,28,30,31,35,37,38,39,47,50],self:[3,24,36,37,49],send:[37,41],separ:[0,3,16,30,31,41,43,48],separate_actions_for_throttle_and_brak:27,seper:10,sequenti:[4,25,32],serv:[7,10,41],server:0,server_height:27,server_width:27,sess:[3,24,49],session:[3,24,49],set:[0,2,3,4,5,6,7,8,11,13,14,15,17,20,21,23,24,25,27,28,30,31,35,36,40,44,45,47,48,49,50],set_environment_paramet:[3,49],set_goal:27,set_incoming_direct:[3,49],set_is_train:24,set_sess:[3,49],set_variable_valu:24,set_weight:24,setup:[3,42,49],setup_logg:[3,49],setuptool:42,sever:[0,3,7,10,11,12,24,27,28,30,36,37,38,39,41,45,48,49,50],shape:[24,30,35],share:[0,3,24,32,41,49],shared_memory_scratchpad:0,shared_optim:24,shift:[31,39],shine:38,should:[0,3,4,7,11,12,18,21,24,25,27,30,32,35,36,37,40,49,50],should_dump:0,shouldn:12,show:44,shown:44,shuffl:25,side:[3,49],sigma:28,signal:[3,39,49],signal_nam:[3,49],significantli:16,sim:6,similar:[7,16,18,25,27,31,48],simpl:[10,32,36,37,41,47,48,50],simplest:48,simplif:48,simplifi:[7,38,41],simul:[27,37,45,50],simultan:7,sinc:[3,7,8,10,18,20,21,23,24,28,30,49],singl:[3,4,5,6,7,11,12,16,17,18,24,25,27,28,31,35,38,39,41,49],size:[24,25,28,30,31,32,35],skill:48,skip:[27,39],slave:[3,49],slice:25,slow:[24,50],slower:[0,16,24],slowli:8,small:[7,20,32],smaller:28,smooth:38,soft:[8,11,19],softmax:28,softwar:42,solut:48,solv:[30,37,45,47],some:[0,3,11,24,25,28,30,36,37,38,41,44,48,49,50],sort:22,sourc:[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,37,42,45,49],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,28,29,30,31,32,39,47,49],spacesdefinit:[3,24,49],spatial:48,spawn:[40,42],special:16,specif:[0,3,12,16,20,24,25,36,39,50],specifi:[0,24,27,28,30,37,40,50],speed:[24,30,48],speedup:50,spread:[30,31],squar:30,squeeze_list:24,squeeze_output:24,src:42,stabil:[6,18,24,48],stabl:[41,48],stack:[3,29,30,35,49],stack_siz:[24,30],stacking_axi:30,stage:41,stai:44,standard:[7,10,11,12,28,30,38],starcraft2_environ:27,starcraft2environ:27,starcraft:[35,45],starcraftobservationtyp:27,start:[3,6,8,11,16,21,25,30,31,37,42,49],state:[1,2,3,4,5,6,7,8,9,10,11,12,14,15,16,17,18,19,20,21,22,23,24,25,27,30,32,35,36,37,39,41,43,48,49],state_key_with_the_class_index:[2,32],state_spac:27,state_valu:25,statist:[3,10,30,47,49],stdev:28,steep:28,step:[0,3,4,5,6,7,8,10,11,12,13,14,15,17,19,20,21,22,23,24,25,27,28,30,36,37,38,39,48,49,50],stepmethod:[8,18],stochast:39,stop:[0,27],store:[0,3,20,23,25,27,30,32,38,39,40,42,47,49,50],store_transitions_only_when_episodes_are_termin:23,str:[0,2,3,4,18,24,25,27,28,30,31,35,49],strategi:[27,45],stream:[16,40],strict:44,string:[0,24,27],structur:[0,3,25,32,36,39,49],stuff:24,style:28,sub:[31,32,33,34,35,36,39,42,50],sub_spac:35,subset:[38,44,48],subtract:21,succeed:27,success:[0,27,48],suffer:38,suffici:25,suffix:[3,24,49],suggest:36,suit:[0,45],suitabl:[40,50],sum:[4,7,10,17,24,25],sum_:[5,13,17,18,20,23],summari:[0,3,49],supervis:48,suppli:[3,49],support:[0,3,24,27,28,38,41,42,43,45,47,50],sure:[0,42,44],surrog:7,swig:42,swingup:27,symbol:24,sync:[3,24,39,40,49],synchron:[0,24,39,41],t_max:[10,18],tag:42,take:[0,10,11,16,20,21,24,27,28,29,37,38,39],taken:[1,2,4,5,6,7,8,11,13,16,20,21,22,23,24,25,27,28],tanh:8,tar:42,target:[0,1,2,3,4,5,6,7,8,11,12,13,14,15,17,18,19,20,21,22,23,24,27,30,31,35,36,39,41,49],target_act:31,target_kl_diverg:11,target_network:24,target_success_r:27,targets_horizon:18,task:[0,1,2,27,30,36,38,45],task_index:0,techniqu:[7,11,47,48],technolog:40,teh:24,temperatur:28,temperature_schedul:28,tensor:[3,24,49],tensorboard:0,tensorflow:[0,3,24,49,50],tensorflow_support:24,term:[6,7,11],termin:[3,8,25,39,49],test:[0,3,5,6,8,9,10,11,24,36,44,47,50],test_using_a_trace_test:0,text:6,textrm:39,than:[0,3,11,24,28,38,41,49],thei:[3,20,21,24,28,38,39,40,48,49,50],them:[4,5,10,18,24,25,27,30,35,37,38,41],therefor:[0,8,24,29,48],theta:[6,7,8,13,23,28],theta_:[6,7],thi:[0,3,4,5,6,7,8,10,11,12,16,18,20,23,24,25,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,44,48,49,50],thing:38,those:[0,3,8,12,14,15,16,20,25,28,31,39,41,43,48,49],thousand:[11,12,13,14,15,17,21,22,23],thread:24,three:[3,40,41,42,43],threshold:[11,20,30],through:[0,3,4,8,9,10,11,12,20,21,24,36,37,39,41,49],tild:8,time:[0,4,21,24,28,31,32,38,41,48],time_limit:37,timestep:[4,10],timid:42,tmp:0,togeth:[3,18,25,39,49],toggl:38,too:11,tool:[38,42,48],top:[24,27,29,30,32,37,38,48],torqu:27,total:[0,3,10,11,17,20,21,25,32,36,38,48,49],total_loss:24,total_return:25,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:28,train:[0,3,16,24,28,33,34,36,37,38,39,40,41,44,47,48,49],train_and_sync_network:24,train_on_batch:24,trainer:[26,40],transfer:[27,33,45],transit:[1,2,3,4,5,6,8,10,11,13,14,15,18,20,21,22,23,32,36,39,40,49],transition_idx:25,tri:48,trick:44,tricki:38,trigger:[27,42],truncat:6,truncated_norm:28,trust:[6,48],ttf2:42,tune:28,tupl:[1,2,3,8,24,25,27,32,35,36],turn:[2,48],tutori:[36,37],tweak:[3,49],two:[8,10,18,24,27,28,29,30,31,35,37,40,41,50],txt:42,type:[0,3,10,16,24,27,30,35,36,39,41,47,48,49,50],typic:[7,11,24,48,50],ubuntu16:42,uhlenbeck:[8,9,28],uint8:30,unbound:35,uncertain:28,uncertainti:28,unchang:11,unclip:[3,36,49],uncorrel:18,undeploi:40,under:[3,24,36,50],underbrac:5,understand:50,unifi:7,uniformli:[27,28,31,35],union:[3,25,27,28,31,35,49],uniqu:24,unit:38,unlik:11,unmask:31,unnecessari:0,unshar:[3,49],unsign:30,unspecifi:24,unstabl:[38,44],until:[0,6,10,11,20,23,28],unus:24,unzip:42,updat:[3,6,7,8,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,28,36,37,38,39,41,42,48,49],update_discounted_reward:25,update_filter_internal_st:[3,49],update_log:[3,49],update_online_network:24,update_step_in_episode_log:[3,49],update_target_network:24,update_transition_before_adding_to_replay_buff:[3,49],upgrad:42,upon:[3,5,36,49],upper:[6,28],usag:[31,47],use:[0,1,2,3,4,5,6,8,9,10,12,14,15,19,24,25,26,27,28,30,31,32,35,36,37,39,41,42,47,48,49,50],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:27,use_kl_regular:[7,11],use_non_zero_discount_for_terminal_st:8,use_separate_networks_per_head:24,use_target_network_for_evalu:8,use_trust_region_optim:6,used:[0,2,3,5,6,7,8,10,11,12,13,17,18,19,20,21,22,24,27,28,30,31,32,33,34,36,37,39,40,41,44,49,50],useful:[0,3,4,23,24,28,30,35,44,48,49,50],user:[24,27,28,38,39,42],userguid:42,uses:[0,1,7,11,16,25,26,28,34,39,40,42,44,48,50],using:[0,3,5,6,7,8,10,11,14,15,17,18,19,20,21,23,24,26,27,28,30,33,36,37,38,40,45,48,49,50],usr:42,usual:[30,39],util:[3,38,49],v_max:13,v_min:13,val:[3,35,49],valid:[0,35],valu:[0,2,3,4,5,6,7,8,11,12,13,14,15,16,18,19,20,21,23,24,25,27,28,30,31,32,35,36,39,41,42,43,48,49],valuabl:38,value_targets_mix_fract:[7,11],valueexcept:[3,49],valueoptimizationag:36,van:4,vari:41,variabl:[24,27,42],variable_scop:24,varianc:[10,28,38,48],variant:[28,32,48],variou:[3,25,32,47],vector:[3,4,8,9,11,12,24,27,30,35,37,41,48,49],vectorobservationspac:30,verbos:27,veri:[0,7,8,10,16,20,38,48,50],version:[7,11,25],versu:24,vertic:24,via:[2,12],video:[0,3,27],video_dump_method:0,view:38,viewabl:[3,49],visit:44,visual:[0,3,27,45,47],visualization_paramet:27,visualizationparamet:[3,27],vizdoom:[42,45],vote:28,wai:[3,7,11,28,31,37,39,41,47,49,50],wait:[5,24,40],walk:37,want:[3,4,23,24,25,30,31,32,49],warn:[28,30,31],wasn:25,weather_id:27,websit:[27,47],weight:[4,5,6,7,8,11,12,13,14,15,17,18,19,20,21,22,23,24,28,39,41,48],well:[20,24,28,35,48],went:11,were:[4,13,14,15,16,20,22,23,24,25,31,44],west:42,wget:42,what:[11,48],when:[0,3,4,5,6,7,8,9,10,11,20,24,25,26,27,28,30,33,34,36,37,38,49,50],whenev:40,where:[2,3,4,5,6,7,11,12,13,16,18,20,21,23,24,25,27,28,30,31,35,38,48,49],which:[0,1,2,3,5,6,7,8,10,11,12,16,18,19,20,21,22,24,25,26,27,28,30,32,33,34,35,36,37,38,39,40,41,43,44,45,47,48,49,50],who:39,why:[38,39],window:[30,31],wise:30,within:[0,7,11,19,28,35,38],without:[5,11,31,32,38,48,50],won:[4,24],wont:24,work:[3,18,24,28,30,31,38,39,48,49,50],workaround:0,workdir:42,worker:[0,3,18,24,26,30,32,33,34,38,40,41,42,48,49,50],worker_devic:24,worker_host:0,wors:48,would:[24,42,48],wrap:[27,30,39,45],wrapper:[3,24,25,27,35,41,49],write:[0,3,49],written:[3,23,26,49],www:42,xdist:42,y_t:[8,12,14,15,17,19,20,21],year:48,yet:[16,37],you:[4,30,32,36,37,42,47,50],your:[36,37,42,50],yuv:30,z_i:[13,23],z_j:[13,23],zero:[2,14,15],zip:42,zlib1g:42},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","ACER","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":20,"function":19,"new":[36,37],"switch":50,Adding:[36,37],Using:37,acer:6,across:48,action:[4,5,6,7,8,9,10,11,12,19,20,31,35,48],actioninfo:25,actor:[5,9],addit:[0,50],additivenois:28,advantag:[19,21],agent:[3,36,39,50],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,43,48,50],api:37,architectur:24,attentionactionspac:35,backend:33,balancedexperiencereplai:32,batch:25,behavior:1,benchmark:44,between:50,blizzard:27,boltzmann:28,bootstrap:[12,28],boxactionspac:35,build:42,can:48,carla:27,carlo:17,categor:[13,28],choos:[4,5,6,7,8,9,10,11,12,19,20],clip:7,clone:[1,42],coach:[37,38,40,42,47],collect:48,compar:38,compoundactionspac:35,condit:2,config:42,contain:42,continu:[7,11,48],continuousentropi:28,control:[20,27,39],copi:41,core:25,creat:42,critic:[5,9],dashboard:38,data:26,deep:[8,15,50],deepmind:27,demonstr:48,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],design:41,determinist:8,direct:4,discret:[5,6,10,48],discreteactionspac:35,distribut:[40,42],distributedtaskparamet:0,doe:48,doubl:14,dqn:[12,13,14,16,22],duel:16,dump:50,egreedi:28,environ:[27,37,45,48,50],envrespons:25,episod:[20,25,32],episodicexperiencereplai:32,episodichindsightexperiencereplai:32,episodichrlhindsightexperiencereplai:32,evalu:50,experiencereplai:32,explor:28,explorationpolici:28,featur:46,file:42,filter:[29,30,31],flag:50,flow:39,framework:50,from:48,futur:4,gener:16,gif:50,goal:35,gradient:[8,10],graph:39,greedi:28,gym:[27,37],have:48,hierarch:9,horizont:40,human:[48,50],imag:42,imageobservationspac:35,imit:[2,50],implement:42,input:30,interfac:42,keep:41,kubernet:34,learn:[2,18,21,47,50],level:39,manag:39,memori:[32,33],mix:17,mont:17,more:48,multi:50,multipl:48,multiselectactionspac:35,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,41],networkwrapp:24,neural:20,nfsdatastor:26,node:[48,50],non:32,normal:19,observ:[30,35],observationclippingfilt:30,observationcropfilt:30,observationmoveaxisfilt:30,observationnormalizationfilt:30,observationreductionbysubpartsnamefilt:30,observationrescalesizebyfactorfilt:30,observationrescaletosizefilt:30,observationrgbtoyfilt:30,observationsqueezefilt:30,observationstackingfilt:30,observationtouint8filt:30,openai:[27,37],optim:[7,11],orchestr:34,ouprocess:28,out:40,output:31,pain:48,parallel:48,paramet:0,parameternois:28,persist:21,plai:50,planarmapsobservationspac:35,polici:[7,8,10,11,28],predict:4,prerequisit:42,presetvalidationparamet:0,prioritizedexperiencereplai:32,process:48,proxim:[7,11],push:42,qdnd:32,quantil:22,rainbow:23,redispubsubbackend:33,regress:22,reinforc:47,render:50,repositori:42,reward:30,rewardclippingfilt:30,rewardnormalizationfilt:30,rewardrescalefilt:30,run:[38,42],s3datastor:26,sampl:48,scale:40,select:48,signal:38,simul:48,singl:50,singleepisodebuff:32,solv:48,space:[35,48],starcraft:27,statist:38,step:18,store:[12,26],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],suit:27,support:40,sync:41,synchron:40,task:48,taskparamet:0,test:49,thread:50,through:50,track:38,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,50],transit:[12,25],transitioncollect:32,truncatednorm:28,type:[25,40],ucb:28,usag:[42,50],vectorobservationspac:35,visual:[38,50],visualizationparamet:0,vizdoom:27,you:48,your:48}})
\ No newline at end of file
diff --git a/docs/selecting_an_algorithm.html b/docs/selecting_an_algorithm.html
index 3bca185..b6f86d7 100644
--- a/docs/selecting_an_algorithm.html
+++ b/docs/selecting_an_algorithm.html
@@ -364,6 +364,14 @@ $(document).ready(function() {
learning stability and speed, both for discrete and continuous action spaces.
+
+
+ ACER
+
+ Similar to A3C with the addition of experience replay and off-policy training. to reduce variance and
+ improve stability it also employs bias correction and trust region optimization techniques.
+
+
DDPG
@@ -480,7 +488,8 @@ algorithms for imitation learning in Coach.
-
+
+
diff --git a/docs/test.html b/docs/test.html
index c32c1ef..e0cb48a 100644
--- a/docs/test.html
+++ b/docs/test.html
@@ -246,17 +246,9 @@ training or testing.
-
collect_savers(parent_path_suffix: str) → rl_coach.saver.SaverCollection
Collect all of agent’s network savers
-:param parent_path_suffix: path suffix of the parent of the agent
-
-(could be name of level manager or composite agent)
-
-
-
-
-| Returns: | collection of all agent savers |
-
-
-
+:param parent_path_suffix: path suffix of the parent of the agent
+(could be name of level manager or composite agent)
+:return: collection of all agent savers
@@ -552,15 +544,20 @@ by val, and by the current phase set in self.phase.
-
-
run_pre_network_filter_for_inference(state: Dict[str, numpy.ndarray]) → Dict[str, numpy.ndarray]
+run_pre_network_filter_for_inference(state: Dict[str, numpy.ndarray], update_filter_internal_state: bool = True) → Dict[str, numpy.ndarray]
Run filters which where defined for being applied right before using the state for inference.
-| Parameters: | state – The state to run the filters on |
+
|---|
| Parameters: |
+- state – The state to run the filters on
+- update_filter_internal_state – Should update the filter’s internal state - should not update when evaluating
+
+ |
-| Returns: | The filtered state |
+
|---|
| Returns: | The filtered state
+ |
@@ -763,7 +760,8 @@ Can be useful for agents that want to tweak the reward, termination signal, etc.
-
+
+
diff --git a/docs/usage.html b/docs/usage.html
index 8aa80e9..7665c96 100644
--- a/docs/usage.html
+++ b/docs/usage.html
@@ -369,7 +369,8 @@ The most up to date description can be found by using the
-
+
+
diff --git a/docs_raw/source/_static/img/algorithms.png b/docs_raw/source/_static/img/algorithms.png
index ed6b475..983df67 100644
Binary files a/docs_raw/source/_static/img/algorithms.png and b/docs_raw/source/_static/img/algorithms.png differ
diff --git a/docs_raw/source/_static/img/design_imgs/acer.png b/docs_raw/source/_static/img/design_imgs/acer.png
new file mode 100644
index 0000000..7bffc3d
Binary files /dev/null and b/docs_raw/source/_static/img/design_imgs/acer.png differ
diff --git a/docs_raw/source/components/agents/index.rst b/docs_raw/source/components/agents/index.rst
index 1a5cd42..62aaf0e 100644
--- a/docs_raw/source/components/agents/index.rst
+++ b/docs_raw/source/components/agents/index.rst
@@ -14,6 +14,7 @@ A detailed description of those algorithms can be found by navigating to each of
:caption: Agents
policy_optimization/ac
+ policy_optimization/acer
imitation/bc
value_optimization/bs_dqn
value_optimization/categorical_dqn
diff --git a/docs_raw/source/components/agents/other/dfp.rst b/docs_raw/source/components/agents/other/dfp.rst
index 6640f56..86ea7aa 100644
--- a/docs_raw/source/components/agents/other/dfp.rst
+++ b/docs_raw/source/components/agents/other/dfp.rst
@@ -32,8 +32,8 @@ Training the network
Given a batch of transitions, run them through the network to get the current predictions of the future measurements
per action, and set them as the initial targets for training the network. For each transition
:math:`(s_t,a_t,r_t,s_{t+1} )` in the batch, the target of the network for the action that was taken, is the actual
- measurements that were seen in time-steps :math:`t+1,t+2,t+4,t+8,t+16` and :math:`t+32`.
- For the actions that were not taken, the targets are the current values.
+measurements that were seen in time-steps :math:`t+1,t+2,t+4,t+8,t+16` and :math:`t+32`.
+For the actions that were not taken, the targets are the current values.
.. autoclass:: rl_coach.agents.dfp_agent.DFPAlgorithmParameters
diff --git a/docs_raw/source/components/agents/policy_optimization/acer.rst b/docs_raw/source/components/agents/policy_optimization/acer.rst
new file mode 100644
index 0000000..7808443
--- /dev/null
+++ b/docs_raw/source/components/agents/policy_optimization/acer.rst
@@ -0,0 +1,60 @@
+ACER
+============
+
+**Actions space:** Discrete
+
+**References:** `Sample Efficient Actor-Critic with Experience Replay `_
+
+Network Structure
+-----------------
+
+.. image:: /_static/img/design_imgs/acer.png
+ :width: 500px
+ :align: center
+
+Algorithm Description
+---------------------
+
+Choosing an action - Discrete actions
++++++++++++++++++++++++++++++++++++++
+
+The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical
+distribution assigned with these probabilities. When testing, the action with the highest probability is used.
+
+Training the network
+++++++++++++++++++++
+Each iteration perform one on-policy update with a batch of the last :math:`T_{max}` transitions,
+and :math:`n` (replay ratio) off-policy updates from batches of :math:`T_{max}` transitions sampled from the replay buffer.
+
+Each update perform the following procedure:
+
+1. **Calculate state values:**
+
+ .. math:: V(s_t) = \mathbb{E}_{a \sim \pi} [Q(s_t,a)]
+
+2. **Calculate Q retrace:**
+
+ .. math:: Q^{ret}(s_t,a_t) = r_t +\gamma \bar{\rho}_{t+1}[Q^{ret}(s_{t+1},a_{t+1}) - Q(s_{t+1},a_{t+1})] + \gamma V(s_{t+1})
+ .. math:: \text{where} \quad \bar{\rho}_{t} = \min{\left\{c,\rho_t\right\}},\quad \rho_t=\frac{\pi (a_t \mid s_t)}{\mu (a_t \mid s_t)}
+
+3. **Accumulate gradients:**
+ :math:`\bullet` **Policy gradients (with bias correction):**
+
+ .. math:: \hat{g}_t^{policy} & = & \bar{\rho}_{t} \nabla \log \pi (a_t \mid s_t) [Q^{ret}(s_t,a_t) - V(s_t)] \\
+ & & + \mathbb{E}_{a \sim \pi} \left(\left[\frac{\rho_t(a)-c}{\rho_t(a)}\right] \nabla \log \pi (a \mid s_t) [Q(s_t,a) - V(s_t)] \right)
+
+ :math:`\bullet` **Q-Head gradients (MSE):**
+
+ .. math:: \hat{g}_t^{Q} = (Q^{ret}(s_t,a_t) - Q(s_t,a_t)) \nabla Q(s_t,a_t)\\
+
+4. **(Optional) Trust region update:** change the policy loss gradient w.r.t network output:
+
+ .. math:: \hat{g}_t^{trust-region} = \hat{g}_t^{policy} - \max \left\{0, \frac{k^T \hat{g}_t^{policy} - \delta}{\lVert k \rVert_2^2}\right\} k
+ .. math:: \text{where} \quad k = \nabla D_{KL}[\pi_{avg} \parallel \pi]
+
+ The average policy network is an exponential moving average of the parameters of the network (:math:`\theta_{avg}=\alpha\theta_{avg}+(1-\alpha)\theta`).
+ The goal of the trust region update is to the difference between the updated policy and the average policy to ensure stability.
+
+
+
+.. autoclass:: rl_coach.agents.acer_agent.ACERAlgorithmParameters
\ No newline at end of file
diff --git a/docs_raw/source/components/agents/value_optimization/double_dqn.rst b/docs_raw/source/components/agents/value_optimization/double_dqn.rst
index cb29797..a041c4f 100644
--- a/docs_raw/source/components/agents/value_optimization/double_dqn.rst
+++ b/docs_raw/source/components/agents/value_optimization/double_dqn.rst
@@ -19,7 +19,7 @@ Training the network
1. Sample a batch of transitions from the replay buffer.
-2. Using the next states from the sampled batch, run the online network in order to find the $Q$ maximizing
+2. Using the next states from the sampled batch, run the online network in order to find the :math:`Q` maximizing
action :math:`argmax_a Q(s_{t+1},a)`. For these actions, use the corresponding next states and run the target
network to calculate :math:`Q(s_{t+1},argmax_a Q(s_{t+1},a))`.
diff --git a/docs_raw/source/components/agents/value_optimization/dqn.rst b/docs_raw/source/components/agents/value_optimization/dqn.rst
index 4882e38..7c267c6 100644
--- a/docs_raw/source/components/agents/value_optimization/dqn.rst
+++ b/docs_raw/source/components/agents/value_optimization/dqn.rst
@@ -26,7 +26,7 @@ Training the network
use the current states from the sampled batch, and run the online network to get the current Q values predictions.
Set those values as the targets for the actions that were not actually played.
-4. For each action that was played, use the following equation for calculating the targets of the network: $$ y_t=r(s_t,a_t)+γ\cdot max_a {Q(s_{t+1},a)} $$
+4. For each action that was played, use the following equation for calculating the targets of the network:
:math:`y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})`
5. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
diff --git a/docs_raw/source/diagrams.xml b/docs_raw/source/diagrams.xml
index 9de9600..7aa1e24 100644
--- a/docs_raw/source/diagrams.xml
+++ b/docs_raw/source/diagrams.xml
@@ -1 +1 @@
-7V1dd6M40v41uUwOQnxeJulO957T3ZuZPrvvu5fEJg47GLwYJ5399SthhEElY9mWMKSVmZN2ZMCYp6pU9aiqdIXvl7++FNHq5Xs+j9Mr25r/usKfrmwb4dAm/9CR9+2IZ+PtwKJI5vVBu4GfyX/jetCqRzfJPF53DizzPC2TVXdwlmdZPCs7Y1FR5G/dw57ztPupq2gRg4GfsyiFo/+XzMuX7WjgWrvxr3GyeGGfjKz6nado9teiyDdZ/XlXNn6ufrZvLyN2rfr49Us0z99aQ/jzFb4v8rzcvlr+uo9T+mzZY9ue97Dn3ea+izgrpU7ANTCvUbqJ2T1Xd1a+s6dRfZ+YnoGu8N3bS1LGP1fRjL77RvAnYy/lMq3fXqTRmj59i7ye5ctkVr9el0X+V3yfp3lRXRV7syB+em7eYc8Zk5HnJE1bR86jOHie0fE8K2thsa3679ZxVvVDxqM0WWRkLI2fS/pnMavP8shf8BHVT+01Lsr4V2uofmRf4nwZl8U7OYS96wQ1fu9MbsNgO/C2ExeHCcVLS1SwVz/fqBbRRXP1HUzkRY2UGDUv0AjaPFq/NOf1IvjZo//JIFjrAPmgIpon8Q61LM9iFcD6aoC1mZ2qcbUtz4K4OgJcPWYbzsE1tAyupSIk/a6GUhsNkMQiJJGjAkkEkIzIPDWMZQ39T5bvK8YP2T34qQAsdG5cDjIvvAnw7scFADYa0wZQCX72YU2Ms/kt9TXIXzMKDQWkjVczt9MnlEZPcXrXeAeth3hX/SdGS/Lxk6devP8//STyCOs//7V33rX8h9u7PtDKqFjEZVeQ43nHY4IwtlBxBaCwsSJOozJ57fpZIqTqT3jME3J3jZQ0tpfJSOMysmus800xi+vT2p4PuJLLyxu2uGttHwS4FkE9em8dtqIHrI+5aWz13psb+meegNz+E8AtcSeQF9tvudOWBmspBfIlFMhMZZJTGeeUYNmpzOYF+hRTiHQ6m8OD12tIO4KlPGywuzrXhJMtGD1PFDOEKmY0g6IWFFEoiaLDz1QnKaP328OoKNTD/Awo0EaMRUZVhX8JZ8fNOiYDWVy+5cVfFd9Fv2ERzxMaN1gkekjyjLyYJwSA5Gmz/XOs8cSAgQP2ukCKZkdtcQJkz/5R4Vi+0N/xrxX5ylGN3CpPk9k7g3YdLVcpPei5yJfNGTXgBEKD9GGk/QGRdgDSsyKOSgpalLUVNMmeKb7507/jreZmFMyteidlA3+ZFztBaU7eHhxlVXgxnyd0OEp3V42e8k0JTjOyAWTDwQPKBpyUG+2OaMxLUK10vIF5kyXPebFM33n9b45YV4DZ93QszbNF9U3IFyHvkPuj1iF+jjZpZ2qoZaS6jbWRiha4lzAYMGzaGQyKN5GKdcLkYZ1UEG+FgDz7kpmIefWC2Zl5c0wWv9EJJnul8hWvV3lGDcxYIR8BvWh1yZ7QGVAUmEdygA0mX6XkKMTOs2dPrfWQ6yFGOszIA4rJ+B19MMksSm/rN5bJfE4/RigEOzGxZIkWWTakOq7+hoLFyXMV2hb4emIiRAGIhhLuzHYjYYSx2xUJ5HFYSxPCFnchPl5XxAbzN3yI20UBoKk5sT2PqUUSGQC/hVzbH1OuUcjJD88mXkiuXUuzXMOIrdhUcfhLVAVj1Rj1w5Ks8r484rMTWcme1qtGZCY/O/cqk+rZGfOL7pYPpmck5NRUzM+uFN4vxAffrAzaClxqJIG2rQttH6A9e8nzdQ8tcj6k9ZreR3WueQ7FFoTLjVekHFC4QJU/rePiFYa109TNS8ZJgQDJQBOQ7BoT8Sd/JWXLnSR//au+B8We5tZdu2pHUG3nk+XhjsT75IMOx3dP8z55r9DjRUyR98nfsIv6vU/+vrjjz/Y+G5U1WrBPC9AE1ICLnRyeDTxVDXysRw34oOriagBJM2654kPM7cNGWW7IZS56AhqUGWzl07uhixgMozJVvCYHpybGYq62JTw7K7YjWkfaD0jiHFjzMgZEwoBwomJD8yFaClNiPiBL002F2C5ssiXPJGvemG2K6jvv3nyNiiR6Ss265xHrnsiyINjaFj5tyNJE83kDKTEQRZLR6YEq9FtUVG/lzfvtNXIDsSzruqtGbUMsWhZVArFEeviH9Qfa4QyzbJ0lJX/UPgKyTo9nnBuv9eM7XZ9hoLVTZAUH6lzC/hPODnDwB4nzFQn/aAXdP1nQuSsNtXx6ULT55ZfjTwDNAQ5+d1ux8sBYkkyz1PkG+ZB8Vm03Yy7flKsqL5adPlZv4YKenztkxhuGcdvvaBZHRme6iJu1uwKiykZ6Z5ObZzAGGAaYnE3hCQTOliTZNEzJ5QMP3sIEwZAWRpAA0GII4tVVzR7Eq2Sdz6sEEJb0sdhW5rIwybBHx7FHPKPgegB2pIt+xjDc7PJHnbz6Fn20jJd5/VFGnSV5BEHzG208ApYoUP4dPIZgVB6Dw8uEfWoa6mCBFAj5Lx7mMPVoifa80u4HM/8omH9Eldu+QCVUTD/Oxdrljab+XlMbBVHBpriPgpJqXhifclHCzo9YgxBhVXRr9U2ocPQylIsv1tvLgRFiVQTwQYzxJfNMkeMcglVX2qkj0RllRP6jvrwU1x6VBwm8K8cSisi5aSrICXuvOyQH5Qhqv/OMatWe6YV8ZtXd4Yp2EC5ntMyfMd1mTpGfU8QSMMScwpRO1AIiTapy/vwZwN5p/WAIiqMBd/zLAQ7px4GgG00goKgRF5+5jzwB7STsxKWiLZ4Laac0jopsp521QVbuG37wYrKGXW9gFawOIFsTTexBymWzmm/TBmtnqRXAGZMrbXL9y/Vk9iaVanNuiVj9wMbixTthVw5OTy/nrxTysjGkp852Y5iGTF2mWLGGZ9QxJjFKnhcgx3MtHPAORYCURJx8g5jd5hC6U8ECp38Fw/XOPcE50C8cRvGK+4V7kJRL85wu4j/nlJTNNsurdrOOaomfxlBlshQ0u5smdTfoOgqQAU/AwOsqA/FE4ZOXUszmyWsHS+8/G7ob0h2F9LpG55aG1dR/bt4lr+qsju1V1ivalXM7luYEhXqc3Fj7rdZw9bls1IjTseLEN49AgagxtiZx8qG7b7Zb6YXLDn2w/cXlXHsfuvbiJRmDYB+Cjn05BHUujP/m+324golZ39ZVPlwaN7b0WE10g8tpIvSkY/q6Ir8+hF8zLI8pAPdg7oKuRW4fLnLX8yTrHG/aCp8GMhfaY0FJvLbOhb4PMKPECvuCNRhdg9t+ai1kdwyR36aIuJQDH3JER3NYUVEyVqy+wWrsIaFfs/5AAW9GButD6O3+Oy7L9xrwaFPmZCgvypd8QXc9+Ebj/7bA7qHMeiXlTPKLTcVt8suXLeqRZrWkW8rDMAdKznjYSn05LsxHGQn/6DicxwYqoOR3JQz42cbX1DEN3vQBDtDj+c+jT2AVZvu/Pe/5dk84m2UMptW86BJ0vy9St3EtRjkuJybNdtxHqxt/JU9T4wIg2axKQVp3jj4BH9BOcEtYMaUfQGc1Wa6K3HR0vTrFReW3vrAO0nW6opDgg5Rq6bSiIr/RHpcV5QXKO7X+2+c7/4MraXNZDhlF8B2PPcE+1GCGvyVbcYOZUOSy6Fopa5rjX3St7KNnxgl2uh+SbgglenNMKHX1GHZ+l7wa9FnyI5JXLb6OzcKwGt4R7YLgsB4pZ2EJyV7yxFa0gGAZZdFCUJU2Pl9rDEq5a4XW29ZANB8r0cmL7evszYL46VlGJ+dRHDzPVCgfUrTwQjDiQEOCTshYlHWAeIbmJNRgnU8av8apVu1TOzuOQ/dA5wnBiqc+3ZMIZ/ToXmA/YU9qPpy7cTB3Ttc9NShxeZVIkNEfCBL6WWuP87bQFGV/6/KDiepSJ/8jucHjUHS+NwdiXMIASWJkijeKLoMSgigNqejQpTWKPjVFB3VDwyr6xbzpaSk6Blve+hAlfYo+ZA64UXQ9io4RUPRgQEWXaD5vwubDYbMtKrnWFjYTNxDAZuJmFXGzLaqx1hU3N1yLmWYPwcTZSCygN7RNs8gEzh9A0/mQDOMB/WlkG0U/KXAeVtGHXCk2ij5Q4DysohuGTAolEDhje8DAGRmGbPqKDgJnjAcMnNHFGLLQ83EkpegxIqruX3hGd/l8fOQIlpgRyyNWr+tDkmRx9poUebY0Gq9D45GPoSzBaQOxqUS90hu27BTcXH6ytx2ImpgtU9GhECHDlinRP5dP1bM9gfZpY8tsw5ZJwsQ7RgIjqc23tg1b9gE0nc/lxPaAvjXTa6Po/SjxtRYDK7phy6av6B6/F9Cwim7YMimU+PowougQJX2Kbtiy6Su67wFF9wdU9IsFztNiyzwLMBxYmJGgiy2zYahs2LJparyHAPOKBTmk+tgyDON1QLtYr0n8ZvCUsw2cE+AI+sOi0NeFJozrQaGkQVOey8YIcNnD4gl9b0OKqiBFkcA310eKYpFzbkKow6Qo8gYMobBotdiEUBPTdJ5uQ/6AIRS+2H5p01J0QIoOq+iiQNco+rQUHZCiwyq6qaaXQgmQosiDKGlTdEe0GG0UfVqKDkhRxPqDD6HojmilfBBFb56ofBsp2WaBu/QhT0vYzG9a7VswaMbCaIvfa+w0zGyA2ec2izkIfMNvYKsZVaCHPmvL1aFCBLZcTTmlA5eybxciPE3rrz2p2IBq9gVJvr5g1UKNJYUkSJKtNrTLJIGiFFBZk3WgGvlRYk1DDrVQlJot2mJYxT4ryIEhbb4pDW6H9S0E5PHAyEms5w7awLgXg31tiqH9Pnofhcb0jKQnMYE86AgG3GNVtikxAnu6WJK93U9o0oscGPXmT+u4eCVPI89YkPNUCCMrCvz1upIEGlghf/VrGxlxsVURv0XFXNHFjnYOiHA9VD89ftrAbgLye9Smq3A2UmS8bMxLqMCFF23OpsRVcEVx+3gt197W62e2V2/m3rHYLcd1VNmtw5dSaLdcGMRHsxNNljExikxM0xn6MibGhRZF7W5kB/demPBmZBLycrrVc22RCydrCpVvR9bcTydwJVpJnhe99JaDsNJ8QZ72MKHQeYWGveBpCmG73sRuWaBNCLL+Kx3qSI2yYw3K3rPzoNF1aQ9HsINMM3gJXZcodhiR57kTORsEzfsl9WzQXAFozEqOxVfFoQX2Hzl1v0LRxSzeMqn0VyWYGyJlK/oyWUb0gTcO4jcqcI/5Oqn8W/zpKS/LfNkriY3XyTuZJbUQd9F6Fc8oOs/JL2oe76qPvGWjFhshr1/KcrWunOUH8v8iKV82TzfEkpI/flCaIIt+vq/LeLkmA7M8mr2Qf4vojT6QaF1RiQ/JckF+R+kiL8jpy/XNKluomYYCro/gbhPpzlq0gI5Tw8e5oeZZ6EN5nIibhfpWvs41aKxCvmPQAqR6FqpOPXY3MND+0vY9r9daBdyCAX/C1bmbdSEWqhtJnoYkWxOV5BD3n6BAkpGR5ClJsqyTOTpJDrTbZEGgYiR5vJKsPMYdSJKRxZLc9YmybrrGiLJSUZZdXVdP13iOkZQpSYozVaPX7AKgz+jpWJAyoqxNlL3JirKjff72jChPSZSVL60OJsp+oFuUfSPKUxLlcKqibB9wMCTOCOxDZ7DCmD1nKFCXwKjLlNQlmKy6ONr5NJjr/M/qpW39fVUmy+S/ddIzJ+9jLH7olSz1uzhZPJoIFmwiUWafkjox3yxOjdQG+Y7ABnkjmbIdfjr1DhKhvNXyVNsg36xOTUmU/cvlqPpm9WdKkhKOZEX+eKPn8kvy6o2eWf2Zkij7k52/+Q5CGkQZZhOLWxaYoAE7PHz2oEGDWX0Zq9HxRfPnSHIzgdE5SFw0O2hpIy58s/oyKVFWnj0xlCg7LDzVJ8qw98hjniazd0PCHT2fDkzCmYWAsRohPf66IFIMw5vQJVpjbX/zWQeBexO4PnLs7W+/+wF7qqmONmvAyfdr1m5/WADo6+4Z55s1lo/fMmuf/vgxkBE7s3Z4SCMWWjx2AbRhohpCJSaM1f+0UPrjz2sDlKiyjndqAwyB8rUBZQOg7vK8JI8tWq0ILK0eH9uOHRWG/KBBFagfdg+jiiwRrI4KWGG/x09G+4QLrsjhq5OHtZSQ5vr+/d4ABYECVUDECYI1yPqAgi04f3w2QAmAcrzDpk8fTHAjkh/XP8t4BWetP66/xVGRJdnCzGgSuCLLPoxrKMLVVYGrgM64/WaAkwEOhHEBtJuOoHUD7glc5YGD/Th+3D4Y4CSAa7q19QGnz5LCJL77qIxpb5EZ7d5lIoMT1THk2JxB7Wgo4E8eHg1wEsC5gFIR0MLa1DGElErD53+pNz8wMMpY1ZAvfGExWKezuGgDYiU42gATw+6Pgt0PRa3ZwpHUKoIFw4MVKzjEvWecz8WHJttsUqJ8uRTb0PQamJSkjKS26Xij5zpB7xkKjB7kAW9nJXn6tnVfJCXtY2ycsINOmIM5MknkhLFECPU+GCQJP316/GKQk0AOe7yG4WBIHj6EROB9mqyqxUrr8fHvBkQZ9bMdGRDZtK0eREgKGugkWQibd+sH1T+brVkbV25krpxtCVw527qY029bpgJzSpLCcpgv7vSjroOBrboUaD/TYXu9Z5zt9Nts6dOI8iRE2RoLaXe0KNsI6RZlQ9pNSpTHQsUcLcrugTMUiLJhFSclyqOpdj5WlD0um1eDKENW8QuJZ1++R1m0mMg20UMXafEoYsEyrq4ircbFaCfOR1RZbevPbwYumPXiuVyeNUYWBEyY96IEL0gbfjVAifSKJSj0wcQ2LFMPEyQGv2/SMrmOTO8HMZPr8/ULQsBY+qx6wGCW4M84fb5epdG7gQuyt9yW88MaQQST/z5n63j5RL6gwQokigX4MFaBKG9aCVYw3+/r7b1ZIpGZwmzoa7BWiUMskbDNeFvQPWxM8Z3I1+A1bFCYYJEkbYyRZ9cskcMABtI2OMBsWzB96fI1kKBW8tvPrwYnqFiudxgntuuZepwgmfH9dptgY5CS8N8FRlCfTkEe4x/3d3Qgia5o/RGsmmQu49o4I4cBDh2bry8PQoivyI1UsXGyjRRuwvE7MuH7FwbOzbxlrTe6i/BDr1z2tOh2u6EqCUu5j5HdAj7kN1UOWIm4+v3fbQQZpE+bOK2qvK1tEwwvWlLLkz2tVxVavBV7S4iY2Nbj5z+NhZOYwizQ60Rg4EJtBi5UZ+BkFgGdsGP8rq0biz6dD2wB6Vd9jIuEYBMXewX2SOOHRcZvJGvdFt//L6j7We23lcyY7zmju0Ao2g7BvUGh5fhe/bt7Nd91b1rv2ieb4b4PCZy+D1FooRsTw2ffW4bgkuVNQn5qxsDiCnkTDtWTLK4tai5V4VdPnF/NxCkHo+3zsYEISE9EMSsBUmHjbhMbqIwNWIFSZ3pkR156eoSFxeGB6RGWm4RHTY844Cdk/8RQBDkWviFBOUK+5/iWy9nRwPFvdm8GzEvRMQkqbPJrdE+/7uGp6h5omn+k7lUKw2tfcMN1qZTXP8yZghA119egZhj6mnfxS/Sa5AXtYWTdp7lpACebuwC2XxA0ytTWOKVpf2Qs5mA5vR0TyBRXe/Nz2HacpwulrY1n9fdRD0faR923wt4zzm9jwFrZtwzj35ZJabaE2GP7QPfZIbeEaHSvXfT+N9P9UgY5LwDtviBybHNv9cDBtOM/oyR7InPB7wEeQEqA537wEN+rLbSQqE+Bpm6JxEap8zlMhdEx3gjnZUisKIQin8VX4aKctHrqhzfe7od3PFzvBoF3j3ZvkOsLLtNaZe35kJODLPJnkedl+/CqUimfx/SI/wE=
\ No newline at end of file
+7V1dd6M40v41uUwOQnxe5qPTPXu6ezPT7767e0lsYrODwYtx0plfvxJGGFQyYFvCkFZ2T48tA8Y8VaWqR1WlK3y/+vk5C9bLb+k8jK9MY/7zCj9cmSZCnkP+Q0fedyOu6e0GFlk0Lw/aD/yI/grLQaMc3UbzcNM4ME/TOI/WzcFZmiThLG+MBVmWvjUPe0nj5reug0UIBn7MghiO/jOa58vdqGcb+/EvYbRYsm9GRvnJczD7c5Gl26T8visTvxR/u49XAbtWefxmGczTt9oQ/nSF77M0zXevVj/vw5g+W/bYduc9Hvi0uu8sTPJeJ2C8O+U1iLchu+fizvJ39jSK3xPSM9AVvntbRnn4Yx3M6KdvBH8ytsxXcfnxIg429Okb5PUsXUWz8vUmz9I/w/s0TrPiqtiZeeHzS/UJe86YjLxEcVw7ch6E3suMjqdJXgqLaZTva8cZxR8ZD+JokZCxOHzJ6dtsVp7lkHfwEZVP7TXM8vBnbah8ZJ/DdBXm2Ts5hH1qeSV+70xu/VLA3/biYjGhWNZEBTvl8w1KEV1UV9/DRF6USIlRczyFoM2DzbI6rxXBTw79Xx8ESx0gX5QF8yjco5akSSgDWFcOsASfBq6m4RgQV0uAq8Nswzm4+obGNZeEpNvUUGqjAZJYhCSyZCCJAJIBmaeGsay++2C4rmT8kNmCnwzAfOvG5iBz/BsP7/9sAGClMXUApeBndmtimMxvqa9B3s0oNBSQOl7V3E6fUBw8h/Fd5R3UHuJd8T8xWj0fP3nq2fu/6DeRR1i+/ffBeddwH2/v2kDLg2wR5k1BDucNjwnCWEPFFoDCxrIwDvLotelniZAqv+EpjcjdVVJS2V4mI5XLyK6xSbfZLCxPq3s+4Eo2L2/Y4K61exDgWgT14L122JoesDnmprHRem+27555ArLbTwC3xJ1AXux+5V5bKqx7KZDbQ4H0VNZzKuOcEtx3KjN5gT7FFCKVzubw4LUa0oZgSQ8bzKbOVeFkDUbHEcUMvowZTaOoBEXk90TR4meqk5TR+eVhlBTqYX4GFGgjxiKjKsO/hLPjdhOSgSTM39Lsz4Lvor8wC+cRjRsMEj1EaUJezCMCQPS83b0dazwxYOCAnSaQotlRWZwA2bN/FDjmS/pv+HNNfnJQIrdO42j2zqDdBKt1TA96ydJVdUYJOIFQI92NtDsg0hZAepaFQU5BC5K6gkbJC8U3ff5PuNPchIK5U+8or+DP02wvKNXJu4ODpAgv5vOIDgfx/qrBc7rNwWlaNoBsWHhA2YCTcqXdAY15CaqFjlcwb5PoJc1W8Tuv/9URmwIw856OxWmyKH4J+SHkE3J/1DqEL8E2bkwNpYwUt7HRUlED9xIGA4ZNe4NB8SZSsYmYPGyiAuKdEJBnnzMTMS9eMDszr45Jwjc6wSSvVL7CzTpNqIEZK+QjoBeNJtnjWwOKAvNIOthg8lNyjkJsPHv21GoPuRxipMOMPKCQjN/RBxPNgvi2/GAVzef0a4RCsBcToy/R0pcNKY4rf6FgcfJchTYFvp6YCJEAoqaEG7PdSBhhbDdFAjkc1r0JYYO7EB+vS2KD+Rvu4naRB2hqTmzPY2pRjwyAX0KuzY8p18jn5IdnEy8k17ahWK5hxJZtizh8GRTBWDFG/bAoKbwvh/jsRFaS5826EpnJz86tyiR7dsb8orvhgukZCTk1GfOz3QvvJfHBt2uNtgSXGvVA21SFtgvQni3TdNNCi5wPabmm91Gda55DMQXhcuUVSQcULlClz5swe4Vh7TR185JxkidA0lMEJLvGRPzJn1FecyfJu3+X9yDZ09y5a1f1CKrufLI83JF4n3zQYbn2ad4n7xU6vIhJ8j75G7ZRu/fJ3xd3/NneZ6WyWgsOaQGagBpwsZPFs4GnqoGL1agBH1RdXA0gacYtV3yIuX3YKMv2ucxFR0CDMoMtfXrXdBGDYVSmitdk79TEWMzVtvhnZ8U2ROtI+wFJnI41L21AehgQTlRMaD5ES2FSzAdkaZqpELuFTbbkGSXVB7NtVvzm/YevQRYFz7Fe9zxi3RMZBgRb2cKnCVmaYD6vICUGIosSOj1QhX4LsuKjtPq8vkauIe7Luu6rUesQi5ZFpUDcIz38w/oD9XCGWbbGkpI7ah8BGafHM9aNU/tzrabPMNDaKTK8jjoXv/2EswMc/EHifEnCP1pBd08WdO5KQy2fdoo2v/xy/AmgOUDnbzclKw+MJck0S51vkA/JZ9U2M+bSbb4u8mLZ6WP1Fi7o+dlDZrxhGLf9imZxZHSmjbhZuykgsmykcza5eQZjgGGAydkUnkDgbEmUTMOUXD7w4C2M5w1pYQQJADWGIFxflexBuI426bxIAGFJH4tdZS4LkzR7dBx7xDMKtgNgR6roZwzDzSZ/1Mirr9FHq3CVll+l1bknjyBofqOMR8A9CpR/BY/BG5XHYPEyYZ6ahjpYIAVC/ouHOUw9aqI9L7T7Uc8/EuYfUeW2K1AJGdOPdbF2eaOpv1fURkFUsCnuoyClmhfGp1yUsPcjNiBEWGfNWn0dKhy9DGXji/X2smCEWBQBfBBjfMk8U2RZXbCqSju1enRGGZH/qC4vxTZH5UEC78oyhCJybpoKsvzW6w7JQVmC2u80oVp1YHoh31l0d7iiHYTzGS3zZ0y3nlP6zyliCRhiTmFKJ2oBEUdFOX/6AmBvtH7QBMXRgFvu5QCH9ONA0I0mEJDUiIvP3EeOgHYSduKS0RbPhrRTHAZZstfO0iBL9w0/eDFZxa5XsApWB5CpiCZ2IOWyXc93aYOls1QL4LTJ7W1y3cv1ZHYmlWpzbolY+cDG4sVbflMOTk8v56/k87IxpKfOdmOYhkxdplixhGfUMSYxSo7jIcuxDezxDoWHpEScfIOY/eYQqlPBPKt9BcN2zj3B6ugXDqN4yf3CHUjKxWlKF/FfUkrKJtvVVb1ZR7HET2OoPFoJmt1Nk7obdB0FyIAjYOBVlYE4ovDJiSlm8+i1gaXz3y3dDemOQnpdonNLw2rqP1efkldlVsfuKps17cq5G4tTgkI5Tm6s/lFtuPheNqrF6Vhx4ptHIE/UGFuROLnQ3dfbrbTCZfou2P7icq69C1178ZKMRrANQcu8HIIqF8Z/8f0+bMHErG7rKhcujWtbeqwm2t7lNBF60iF9XZBfH8KvGZbHFIDbmbugapHbhYvc5TzJOsfrtsKngcyF9lhQEq+sc6HrAswoscJ+YAlG0+DWn1oN2T1D5NYpIi7lwIUc0dEcVpDljBUrb7AYe4zozyy/UMCbkcHyEHq7/wnz/L0EPNjmKRlKs3yZLuiuB19p/F8X2AOUWauknEl+sam4Tn65fYt6erNavVvKwzAHSs542Ep1OS7MRxkJ/2hZnMcGKqD670ro8bONq6hjGrzpDg7Q4fnPo09gFWaHfz3v+TZPOJtl9KbVvOgSdL8rUrdxLUZZNicm1XbcR6sbfyVHUeMCINmsSqG37hx9Au7QTnBLWDKl70FnNVqts1R3dL06xUXlt74wOuk6VVGI90FKtVRaUZHfaI7LivIC5Zxa/+3ynf/BlZS5LF1GEfzGY08wuxrM8LdkSm4w44tcFlUrZVVz/IuulX30zDjBTvdD0g1+j94cE0pdPYad3yevem2W/IjkVYOvYzMwrIa3RLsgWKxHyllYQrKXPLE1LSBYBUmwEFSljc/XGoNS7luhtbY1EM3HUnTyYvs6OzMvfH7po5PzIPReZjKUD0laeCEYcaAhQSdkLMo6QDxDcxJqsM4nDl/DWKn2yZ0dx6F7oPOEYMVTne71CGfU6J5nPmOn13w4t0Nvbp2ue3JQ4vIqkSCj3xMk9LPWHudtoSnK/lblBxPVpU7+R3KDx6HofG8OxLiEAZLEyBSvFb0PSgiiNKSiQ5dWK/rUFB3UDQ2r6Bfzpqel6BhseetClNQp+pA54FrR1Sg6RkDRvQEVvUfzeR02d4fNpqjkWlnYTNxAAJuOm2XEzaaoxlpV3FxxLXqa7YKJs5FYQG8om2aRDpw/gKbzIRnGA/rTyNSKflLgPKyiD7lSrBV9oMB5WEXXDFkvlEDgjM0BA2ekGbLpKzoInDEeMHBGF2PIfMfFQS9FDxFRdffCM7rN5+MjS7DEjFgesXxdH5IkC5PXKEuTldZ4FRqPXAxlCU4biE0l8pVes2Wn4Gbzk71pQdTEbJmMDoUIabZMiv7ZfKqe6Qi0TxlbZmq2rCdMvGMkMJLKfGtTs2UfQNP5XE5sDuhbM73Wit6OEl9rMbCia7Zs+oru8HsBDavomi3rhRJfH0YUHaKkTtE1WzZ9RXcdoOjugIp+scB5WmyZYwCGAwszElSxZSYMlTVbNk2NdxBgXrEgh1QdW4ZhvA5oF+M1Ct80nv1sA+cEWIL+sMh3VaEJ43pQKKnR7M9lYwS47GHxhL63JkVlkKJI4JurI0WxyDnXIVQ3KYqcAUMoLFot1iHUxDSdp9uQO2AIhS+2X9q0FB2QosMquijQ1Yo+LUUHpOiwiq6r6XuhBEhR5ECUlCm6JVqM1oo+LUUHpChi/cGHUHRLtFI+iKJXT7R/G6m+zQL36UOOkrCZ37TaNWDQjIXRFr/X2GmYmQCzT3UWcxD4ht/AVjGqQA9d1parQYUIbLmcckoLLmXfLkR46tZfB1KxAdXsCpJ8XcGqhRxLCkmQKFlvaZdJAkUuoLIm60BV8iPFmvocar4oNVu0xbCMfVaQBUPadJtr3Lr1zQfk8cDI9VjPHbSBcSsGh9oUQ/t99D4KlekZSU9iArnXEAy4x2rfpsQI7Oli9OztfkKTXmTBqDd93oTZK3kaacKCnOdMGFlR4K83hSTQwAq565+7yIiLrbLwLcjmki52tHNAhOux+Gvx0wZ2E5DbojZNhTORJONlYl5CBS68aHM2Ka6CLYrbx2u5DrZeP7O9ejX3jsVuWbYly251X0qi3bJhEB/MTjRZ2sRIMjFVZ+jLmBgbWhS5u5F17r0w4c3IesjL6VbPNkUuXF9TKH07sup+GoEr0UryvOildxyEEacL8rSHCYXOKzRsBU9RCNv0JvbLAnVCkPVfaVBHcpQdK1D2lp0Hta739nAEO8hUg5fQ9R7FDiPyPPciZ4Kg+bCkng2aLQCNWcmx+KrYN8D+I6fuVyi6mMFbJpn+ag/mhkjZmr6MVgF94JWD+JUK3FO6iQr/Fj88p3merlolsfI6eSczpxbiLtiswxlF5yX6Sc3jXfGVt2zUYCPk9TLP15vCWX4k/19E+XL7fEMsKXnzndIESfDjfZOHqw0ZmKXBbEn+mwVv9IEEm4JKfIxWC/JvEC/SjJy+2tysk4Wcacjj+gjuN5FurEUL6Dg5fJztK56FPpTHibhZqG3l61yDxirkGwbNQ7JnoeLUY3cDA+0vTddxWq2Vxy0Y8CdcnbtZF2KhupbkaUiyMVFJ9nH7CRIkGWlJnpIk93UyRyfJnnKbLAhUtCSPV5Klx7gDSTIyWJK7OlFWTddoUZYqyn1X1+XTNY6lJWVKkmJN1ehVuwCoM3oqFqS0KCsTZWeyomwpn78dLcpTEmXpS6uDibLrqRZlV4vylETZn6oomx0ORo8zPLPrDFYYc+AMCeriaXWZkrp4k1UXSzmfBnOd/794aRp/X+fRKvqrTHrm5H2MxQ+tkiV/FyeDRxPBgk0kyuyTUifm6sWpkdog1xLYIGckU7bFT6dOJxHKWy1Htg1y9erUlETZvVyOqqtXf6YkKf5IVuSPN3o2y2JSZ/T06s+URNmd7PxtO8ee4bKdA9UJP8w/Fjc50GEGtnj4zEHDDL1eM1Yz5Ypm3JFkcwIz1Ul1YN5MSac6XL1eMylRlp5vMZQoWyygVSfKsFvJUxpHs3dN2x09nw5M2+mlg7EaITUePjQPju3d+DbRGmP3L5+n4Nk3nk18cHP3r9v8ggP1V0ebNeDkuyXPd9CsOXxMzJ1xvlljGfw1s/bw+/eBjNiZ1cZDGjHf4JeFPGjDRFWHUkwYqxiqofT7H9caKFEtHu/UehgC5SoDygRA3aVpTh5bsF4TWGpdQXY9PgoM+UGNKlA/bHejigwRrJYMWGGHyAetfcIlWmTx9czDWkpIc337dq+BgkCBuiHiBMGqZXVAwaad3z9poARAWU636VMHE9y65Pv1jzxcw1nr9+uvYZAlUbLQM1oPXJFhduPqi3C1ZeAqoDNuv2rg+gBnQ8sJgLMEzR5wS+DaHzjYweP77aMGrgdwVX+3NuDUWVKY9ncf5CHtRjKj/b50ZHCiOvocmzOoHfUF/MnjkwauB3A236ZRRAsrU0cfUioVn/+53C5Bw9jHqvp8qQyLwRq9yEVbFkvB0QSYaHZ/FOy+L2rm5o+kuhEsGHbWuGCWCamsxsXXNeeTEuWR1LgcL8q2yVMK0kUZsju3s5w8fdO4z6Kc9rPVU2vn1GphzuSIpla2vC1/ZoXUz8PD02eNXA/kMF8K72FvSHbVh/TOfRytiyUo4+np7xrEPuoHGToRiGzalg8ipHo0dD1jS4OfEwfVP5OtRGpXbmSunGkIXDnTuFh9lckEVUvKJCSFZaZe3OlHTdYFG2WBx+H4lXP6+TPOdvpNNl1qUZ6EKBtjoWKOFmUTIdWirEsFJyXKY6FijhZlu+MMCaKsWcVJifJoql6PFWWHy9FUIMqQVfxM4tnltyAJFhPZLnhoHopHEQsW51SV3lQuRj0dOqDKahp/fNVwwVwGx+ayZzEyIGDCbAYpeEHa8IsGSqRXbNm5DSa2cZV8mCAx+G0b59F1oCv6xUyuy2elCwFjSZHyAYO5Xz/C+OV6HQfvGi7I3nJbjw9rBBFM6fqUbMLVM/mBGiuQ/sNqY1oVS5QNKwUrmMX15fZeL5H0mcJM6GuwlnlDLJGwTVlr0D1udUmVyNfgNWxQmGDpG213kCbXLJFDAwbSNjjATFMwfanyNViRed05/Prji8YJKpbtdOPEdr+SjxMkM77d7hJsNFI9/HeBEVSnU5DH+Mf9HR2IgitaVQJr4ZjLuNHOSDfAvmXyVcOeD/EVuZEyNtA1kcTNGH5FJvzwwsC5mbesoUJzEX7olcuWVs12M1QlYSn3NX23Avf5zXU9Vvgrfx9wE0EG6WEbxkXtrrFrbeAEK2p5kufNukCLt2JvERET03j69Ie2cD2mMAN0sBAYOF+ZgfPlGbg+i4CW3zB+18aNQZ/OB7aA9Kc+hVlEsAmzgwJ7pPHDIuM3krVug+/q5pVdig7bSmbMD5zRXCAUtcW3b5BvWK5T/tu8mmvbN7VPzZPNcNuXeFbbl0i00JWJ4bPvDU1w9Qzv+D28fJbV08WbcKieZHFNUcugAr9y4vyiJ85+MBoOHxuIgHREFLMUICW2Y9axgczYgBUoNaZHduSlp0dYLup3TI+WgVvP6JoeMd8s0nNPDEUQ9swbEpQj5DqWa4DrWu7N/kOPeSkqJkGJrVu17qnXvZFsPX+87mHMy/iRukcVhtc+74brPdhb/yzEmQIfYXZ9BWqGoa95Fy6D1yjNaGca4z5OdVuvnu6KgzhJErU/VNYOo2pqoy3mYDm9DRPI0iuVt7SGzaR5urC3tXGc9u7Y/ki7Y7sO33lGdnds1qC8Zhh/W0W5bvR/wPZZPTo6qcs2ZrpXL3r/Tfc07IOcy/crEM1abJNn+cDBtOM/gih5JnOBBq8bPMR34PINJOpToKgHHrFR8nwOXWEks28R6znZdFGkF7ufvnrq+jfO/o+3QbZzg8CnR7s3yHYFl6mtsrZ8ibwg6+6vh+/p42//ffm/5dPd12jzKf3X365NFnZqd122ghS//+FndbP03Xv5Tij/Am0anLiw+I2fO1t5WRZPXLS18urzjehk0gLcCpsYhtImC2bu397/MssswGPoJdFVzYUN2swPl2V8AE7dGmV4s0kezd5q0jfvV3UTep5B7dFWsU2zL2CP+eX/ztaKXvsZnfYYnH+yPbbxIY0+2x6Tt1ma5vXDizrsdB7SI/4H
\ No newline at end of file
diff --git a/docs_raw/source/selecting_an_algorithm.rst b/docs_raw/source/selecting_an_algorithm.rst
index c867191..e56f802 100644
--- a/docs_raw/source/selecting_an_algorithm.rst
+++ b/docs_raw/source/selecting_an_algorithm.rst
@@ -190,6 +190,14 @@ The algorithms are ordered by their release date in descending order.
learning stability and speed, both for discrete and continuous action spaces.
+
+
+ ACER
+
+ Similar to A3C with the addition of experience replay and off-policy training. to reduce variance and
+ improve stability it also employs bias correction and trust region optimization techniques.
+
+
DDPG
diff --git a/img/algorithms.png b/img/algorithms.png
index ed6b475..983df67 100644
Binary files a/img/algorithms.png and b/img/algorithms.png differ
diff --git a/rl_coach/agents/acer_agent.py b/rl_coach/agents/acer_agent.py
new file mode 100644
index 0000000..a539e8c
--- /dev/null
+++ b/rl_coach/agents/acer_agent.py
@@ -0,0 +1,198 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+import numpy as np
+
+from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import ACERPolicyHeadParameters, QHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters
+from rl_coach.core_types import Batch
+from rl_coach.exploration_policies.categorical import CategoricalParameters
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.spaces import DiscreteActionSpace
+from rl_coach.utils import eps, last_sample
+
+
+class ACERAlgorithmParameters(AlgorithmParameters):
+ """
+ :param num_steps_between_gradient_updates: (int)
+ Every num_steps_between_gradient_updates transitions will be considered as a single batch and use for
+ accumulating gradients. This is also the number of steps used for bootstrapping according to the n-step formulation.
+
+ :param ratio_of_replay: (int)
+ The number of off-policy training iterations in each ACER iteration.
+
+ :param num_transitions_to_start_replay: (int)
+ Number of environment steps until ACER starts to train off-policy from the experience replay.
+ This emulates a heat-up phase where the agents learns only on-policy until there are enough transitions in
+ the experience replay to start the off-policy training.
+
+ :param rate_for_copying_weights_to_target: (float)
+ The rate of the exponential moving average for the average policy which is used for the trust region optimization.
+ The target network in this algorithm is used as the average policy.
+
+ :param importance_weight_truncation: (float)
+ The clipping constant for the importance weight truncation (not used in the Q-retrace calculation).
+
+ :param use_trust_region_optimization: (bool)
+ If set to True, the gradients of the network will be modified with a term dependant on the KL divergence between
+ the average policy and the current one, to bound the change of the policy during the network update.
+
+ :param max_KL_divergence: (float)
+ The upper bound parameter for the trust region optimization, use_trust_region_optimization needs to be set true
+ for this parameter to have an effect.
+
+ :param beta_entropy: (float)
+ An entropy regulaization term can be added to the loss function in order to control exploration. This term
+ is weighted using the beta value defined by beta_entropy.
+ """
+ def __init__(self):
+ super().__init__()
+ self.apply_gradients_every_x_episodes = 5
+ self.num_steps_between_gradient_updates = 5000
+ self.ratio_of_replay = 4
+ self.num_transitions_to_start_replay = 10000
+ self.rate_for_copying_weights_to_target = 0.99
+ self.importance_weight_truncation = 10.0
+ self.use_trust_region_optimization = True
+ self.max_KL_divergence = 1.0
+ self.beta_entropy = 0
+
+
+class ACERNetworkParameters(NetworkParameters):
+ def __init__(self):
+ super().__init__()
+ self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
+ self.middleware_parameters = FCMiddlewareParameters()
+ self.heads_parameters = [QHeadParameters(loss_weight=0.5), ACERPolicyHeadParameters(loss_weight=1.0)]
+ self.optimizer_type = 'Adam'
+ self.async_training = True
+ self.clip_gradients = 40.0
+ self.create_target_network = True
+
+
+class ACERAgentParameters(AgentParameters):
+ def __init__(self):
+ super().__init__(algorithm=ACERAlgorithmParameters(),
+ exploration={DiscreteActionSpace: CategoricalParameters()},
+ memory=EpisodicExperienceReplayParameters(),
+ networks={"main": ACERNetworkParameters()})
+ @property
+ def path(self):
+ return 'rl_coach.agents.acer_agent:ACERAgent'
+
+
+# Actor-Critic with Experience Replay - https://arxiv.org/abs/1611.01224
+class ACERAgent(PolicyOptimizationAgent):
+ def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None):
+ super().__init__(agent_parameters, parent)
+ # signals definition
+ self.q_loss = self.register_signal('Q Loss')
+ self.policy_loss = self.register_signal('Policy Loss')
+ self.probability_loss = self.register_signal('Probability Loss')
+ self.bias_correction_loss = self.register_signal('Bias Correction Loss')
+ self.unclipped_grads = self.register_signal('Grads (unclipped)')
+ self.V_Values = self.register_signal('Values')
+ self.kl_divergence = self.register_signal('KL Divergence')
+
+ def _learn_from_batch(self, batch):
+
+ fetches = [self.networks['main'].online_network.output_heads[1].probability_loss,
+ self.networks['main'].online_network.output_heads[1].bias_correction_loss,
+ self.networks['main'].online_network.output_heads[1].kl_divergence]
+
+ # batch contains a list of transitions to learn from
+ network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys()
+
+ # get the values for the current states
+ Q_values, policy_prob = self.networks['main'].online_network.predict(batch.states(network_keys))
+ avg_policy_prob = self.networks['main'].target_network.predict(batch.states(network_keys))[1]
+ current_state_values = np.sum(policy_prob * Q_values, axis=1)
+
+ actions = batch.actions()
+ num_transitions = batch.size
+ Q_head_targets = Q_values
+
+ Q_i = Q_values[np.arange(num_transitions), actions]
+
+ mu = batch.info('all_action_probabilities')
+ rho = policy_prob / (mu + eps)
+ rho_i = rho[np.arange(batch.size), actions]
+
+ rho_bar = np.minimum(1.0, rho_i)
+
+ if batch.game_overs()[-1]:
+ Qret = 0
+ else:
+ result = self.networks['main'].online_network.predict(last_sample(batch.next_states(network_keys)))
+ Qret = np.sum(result[0] * result[1], axis=1)[0]
+
+ for i in reversed(range(num_transitions)):
+ Qret = batch.rewards()[i] + self.ap.algorithm.discount * Qret
+ Q_head_targets[i, actions[i]] = Qret
+ Qret = rho_bar[i] * (Qret - Q_i[i]) + current_state_values[i]
+
+ Q_retrace = Q_head_targets[np.arange(num_transitions), actions]
+
+ # train
+ result = self.networks['main'].train_and_sync_networks({**batch.states(network_keys),
+ 'output_1_0': actions,
+ 'output_1_1': rho,
+ 'output_1_2': rho_i,
+ 'output_1_3': Q_values,
+ 'output_1_4': Q_retrace,
+ 'output_1_5': avg_policy_prob},
+ [Q_head_targets, current_state_values],
+ additional_fetches=fetches)
+
+ for network in self.networks.values():
+ network.update_target_network(self.ap.algorithm.rate_for_copying_weights_to_target)
+
+ # logging
+ total_loss, losses, unclipped_grads, fetch_result = result[:4]
+ self.q_loss.add_sample(losses[0])
+ self.policy_loss.add_sample(losses[1])
+ self.probability_loss.add_sample(fetch_result[0])
+ self.bias_correction_loss.add_sample(fetch_result[1])
+ self.unclipped_grads.add_sample(unclipped_grads)
+ self.V_Values.add_sample(current_state_values)
+ self.kl_divergence.add_sample(fetch_result[2])
+
+ return total_loss, losses, unclipped_grads
+
+ def learn_from_batch(self, batch):
+ # perform on-policy training iteration
+ total_loss, losses, unclipped_grads = self._learn_from_batch(batch)
+
+ if self.ap.algorithm.ratio_of_replay > 0 \
+ and self.memory.num_transitions() > self.ap.algorithm.num_transitions_to_start_replay:
+ n = np.random.poisson(self.ap.algorithm.ratio_of_replay)
+ # perform n off-policy training iterations
+ for _ in range(n):
+ new_batch = Batch(self.call_memory('sample', (self.ap.algorithm.num_steps_between_gradient_updates, True)))
+ result = self._learn_from_batch(new_batch)
+ total_loss += result[0]
+ losses += result[1]
+ unclipped_grads += result[2]
+
+ return total_loss, losses, unclipped_grads
+
+ def get_prediction(self, states):
+ tf_input_state = self.prepare_batch_for_inference(states, "main")
+ return self.networks['main'].online_network.predict(tf_input_state)[1:] # index 0 is the state value
diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py
index 4a1fbf1..dd0f3da 100644
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -1026,7 +1026,7 @@ class Agent(AgentInterface):
"""
Collect all of agent's network savers
:param parent_path_suffix: path suffix of the parent of the agent
- (could be name of level manager or composite agent)
+ (could be name of level manager or composite agent)
:return: collection of all agent savers
"""
parent_path_suffix = "{}.{}".format(parent_path_suffix, self.name)
diff --git a/rl_coach/agents/policy_optimization_agent.py b/rl_coach/agents/policy_optimization_agent.py
index 390edcd..43dc3db 100644
--- a/rl_coach/agents/policy_optimization_agent.py
+++ b/rl_coach/agents/policy_optimization_agent.py
@@ -149,7 +149,7 @@ class PolicyOptimizationAgent(Agent):
action_probabilities = np.array(action_values).squeeze()
action = self.exploration_policy.get_action(action_probabilities)
action_info = ActionInfo(action=action,
- action_probability=action_probabilities[action])
+ all_action_probabilities=action_probabilities)
self.entropy.add_sample(-np.sum(action_probabilities * np.log(action_probabilities + eps)))
elif isinstance(self.spaces.action, BoxActionSpace):
diff --git a/rl_coach/architectures/head_parameters.py b/rl_coach/architectures/head_parameters.py
index 0aab2b6..ccbde7f 100644
--- a/rl_coach/architectures/head_parameters.py
+++ b/rl_coach/architectures/head_parameters.py
@@ -176,3 +176,13 @@ class RainbowQHeadParameters(HeadParameters):
dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
loss_weight=loss_weight)
+
+
+class ACERPolicyHeadParameters(HeadParameters):
+ def __init__(self, activation_function: str ='relu', name: str='acer_policy_head_params',
+ num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0,
+ loss_weight: float = 1.0, dense_layer=None):
+ super().__init__(parameterized_class_name="ACERPolicyHead", activation_function=activation_function, name=name,
+ dense_layer=dense_layer, num_output_head_copies=num_output_head_copies,
+ rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor,
+ loss_weight=loss_weight)
diff --git a/rl_coach/architectures/tensorflow_components/architecture.py b/rl_coach/architectures/tensorflow_components/architecture.py
index 3a3802f..4af1622 100644
--- a/rl_coach/architectures/tensorflow_components/architecture.py
+++ b/rl_coach/architectures/tensorflow_components/architecture.py
@@ -350,7 +350,7 @@ class TensorFlowArchitecture(Architecture):
importance_weight = np.ones(target_ph.shape[0])
else:
importance_weight = importance_weights[placeholder_idx]
- importance_weight = np.reshape(importance_weight, (-1,) + (1,)*(len(target_ph.shape)-1))
+ importance_weight = np.reshape(importance_weight, (-1,) + (1,) * (len(target_ph.shape) - 1))
feed_dict[self.importance_weights[placeholder_idx]] = importance_weight
diff --git a/rl_coach/architectures/tensorflow_components/heads/__init__.py b/rl_coach/architectures/tensorflow_components/heads/__init__.py
index 7e64234..5631fec 100644
--- a/rl_coach/architectures/tensorflow_components/heads/__init__.py
+++ b/rl_coach/architectures/tensorflow_components/heads/__init__.py
@@ -11,6 +11,7 @@ from .q_head import QHead
from .quantile_regression_q_head import QuantileRegressionQHead
from .rainbow_q_head import RainbowQHead
from .v_head import VHead
+from .acer_policy_head import ACERPolicyHead
__all__ = [
'CategoricalQHead',
@@ -25,5 +26,6 @@ __all__ = [
'QHead',
'QuantileRegressionQHead',
'RainbowQHead',
- 'VHead'
+ 'VHead',
+ 'ACERPolicyHead'
]
diff --git a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
new file mode 100644
index 0000000..567cfb5
--- /dev/null
+++ b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
@@ -0,0 +1,126 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorflow as tf
+
+from rl_coach.architectures.tensorflow_components.layers import Dense
+from rl_coach.architectures.tensorflow_components.heads.head import Head
+from rl_coach.base_parameters import AgentParameters
+from rl_coach.core_types import ActionProbabilities
+from rl_coach.spaces import DiscreteActionSpace
+from rl_coach.spaces import SpacesDefinition
+from rl_coach.utils import eps
+
+
+class ACERPolicyHead(Head):
+ def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
+ head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu',
+ dense_layer=Dense):
+ super().__init__(agent_parameters, spaces, network_name, head_idx, loss_weight, is_local, activation_function,
+ dense_layer=dense_layer)
+ self.name = 'acer_policy_head'
+ self.return_type = ActionProbabilities
+ self.beta = None
+ self.action_penalty = None
+
+ # a scalar weight that penalizes low entropy values to encourage exploration
+ if hasattr(agent_parameters.algorithm, 'beta_entropy'):
+ # we set the beta value as a tf variable so it can be updated later if needed
+ self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy),
+ trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
+ self.beta_placeholder = tf.placeholder('float')
+ self.set_beta = tf.assign(self.beta, self.beta_placeholder)
+
+ def _build_module(self, input_layer):
+ if isinstance(self.spaces.action, DiscreteActionSpace):
+ # create a discrete action network (softmax probabilities output)
+ self._build_discrete_net(input_layer, self.spaces.action)
+ else:
+ raise ValueError("only discrete action spaces are supported for ACER")
+
+ if self.is_local:
+ # add entropy regularization
+ if self.beta:
+ self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
+ self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
+ tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+
+ # Truncated importance sampling with bias corrections
+ importance_sampling_weight = tf.placeholder(tf.float32, [None, self.num_actions],
+ name='{}_importance_sampling_weight'.format(self.get_name()))
+ self.input.append(importance_sampling_weight)
+ importance_sampling_weight_i = tf.placeholder(tf.float32, [None],
+ name='{}_importance_sampling_weight_i'.format(self.get_name()))
+ self.input.append(importance_sampling_weight_i)
+
+ V_values = tf.placeholder(tf.float32, [None], name='{}_V_values'.format(self.get_name()))
+ self.target.append(V_values)
+ Q_values = tf.placeholder(tf.float32, [None, self.num_actions], name='{}_Q_values'.format(self.get_name()))
+ self.input.append(Q_values)
+ Q_retrace = tf.placeholder(tf.float32, [None], name='{}_Q_retrace'.format(self.get_name()))
+ self.input.append(Q_retrace)
+
+ action_log_probs_wrt_policy = self.policy_distribution.log_prob(self.actions)
+ self.probability_loss = -tf.reduce_mean(action_log_probs_wrt_policy
+ * (Q_retrace - V_values)
+ * tf.minimum(self.ap.algorithm.importance_weight_truncation,
+ importance_sampling_weight_i))
+
+ log_probs_wrt_policy = tf.log(self.policy_probs + eps)
+ bias_correction_gain = tf.reduce_sum(log_probs_wrt_policy
+ * (Q_values - tf.expand_dims(V_values, 1))
+ * tf.nn.relu(1.0 - (self.ap.algorithm.importance_weight_truncation
+ / (importance_sampling_weight + eps)))
+ * tf.stop_gradient(self.policy_probs),
+ axis=1)
+ self.bias_correction_loss = -tf.reduce_mean(bias_correction_gain)
+
+ self.loss = self.probability_loss + self.bias_correction_loss
+ tf.losses.add_loss(self.loss)
+
+ # Trust region
+ batch_size = tf.to_float(tf.shape(input_layer)[0])
+ average_policy = tf.placeholder(tf.float32, [None, self.num_actions],
+ name='{}_average_policy'.format(self.get_name()))
+ self.input.append(average_policy)
+ average_policy_distribution = tf.contrib.distributions.Categorical(probs=(average_policy + eps))
+ self.kl_divergence = tf.reduce_mean(tf.distributions.kl_divergence(average_policy_distribution,
+ self.policy_distribution))
+ if self.ap.algorithm.use_trust_region_optimization:
+ @tf.custom_gradient
+ def trust_region_layer(x):
+ def grad(g):
+ g = - g * batch_size
+ k = - average_policy / (self.policy_probs + eps)
+ adj = tf.nn.relu(
+ (tf.reduce_sum(k * g, axis=1) - self.ap.algorithm.max_KL_divergence)
+ / (tf.reduce_sum(tf.square(k), axis=1) + eps))
+ g = g - tf.expand_dims(adj, 1) * k
+ return - g / batch_size
+ return tf.identity(x), grad
+ self.output = trust_region_layer(self.output)
+
+ def _build_discrete_net(self, input_layer, action_space):
+ self.num_actions = len(action_space.actions)
+ self.actions = tf.placeholder(tf.int32, [None], name='{}_actions'.format(self.get_name()))
+ self.input.append(self.actions)
+
+ policy_values = self.dense_layer(self.num_actions)(input_layer, name='fc')
+ self.policy_probs = tf.nn.softmax(policy_values, name='{}_policy'.format(self.get_name()))
+
+ # (the + eps is to prevent probability 0 which will cause the log later on to be -inf)
+ self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_probs + eps))
+ self.output = self.policy_probs
diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py
index 726f536..90374a8 100644
--- a/rl_coach/core_types.py
+++ b/rl_coach/core_types.py
@@ -329,12 +329,12 @@ class ActionInfo(object):
Action info is a class that holds an action and various additional information details about it
"""
- def __init__(self, action: ActionType, action_probability: float=0,
+ def __init__(self, action: ActionType, all_action_probabilities: float=0,
action_value: float=0., state_value: float=0., max_action_value: float=None,
action_intrinsic_reward: float=0):
"""
:param action: the action
- :param action_probability: the probability that the action was given when selecting it
+ :param all_action_probabilities: the probability that the action was given when selecting it
:param action_value: the state-action value (Q value) of the action
:param state_value: the state value (V value) of the state where the action was taken
:param max_action_value: in case this is an action that was selected randomly, this is the value of the action
@@ -344,7 +344,7 @@ class ActionInfo(object):
selection
"""
self.action = action
- self.action_probability = action_probability
+ self.all_action_probabilities = all_action_probabilities
self.action_value = action_value
self.state_value = state_value
if not max_action_value:
diff --git a/rl_coach/memories/episodic/episodic_experience_replay.py b/rl_coach/memories/episodic/episodic_experience_replay.py
index 2f4b393..fab05f7 100644
--- a/rl_coach/memories/episodic/episodic_experience_replay.py
+++ b/rl_coach/memories/episodic/episodic_experience_replay.py
@@ -75,18 +75,27 @@ class EpisodicExperienceReplay(Memory):
def num_transitions_in_complete_episodes(self):
return self._num_transitions_in_complete_episodes
- def sample(self, size: int) -> List[Transition]:
+ def sample(self, size: int, is_consecutive_transitions=False) -> List[Transition]:
"""
- Sample a batch of transitions form the replay buffer. If the requested size is larger than the number
+ Sample a batch of transitions from the replay buffer. If the requested size is larger than the number
of samples available in the replay buffer then the batch will return empty.
:param size: the size of the batch to sample
+ :param is_consecutive_transitions: if set True, samples a batch of consecutive transitions.
:return: a batch (list) of selected transitions from the replay buffer
"""
self.reader_writer_lock.lock_writing()
if self.num_complete_episodes() >= 1:
- transitions_idx = np.random.randint(self.num_transitions_in_complete_episodes(), size=size)
- batch = [self.transitions[i] for i in transitions_idx]
+ if is_consecutive_transitions:
+ episode_idx = np.random.randint(0, self.num_complete_episodes())
+ if self._buffer[episode_idx].length() <= size:
+ batch = self._buffer[episode_idx].transitions
+ else:
+ transition_idx = np.random.randint(size, self._buffer[episode_idx].length())
+ batch = self._buffer[episode_idx].transitions[transition_idx-size:transition_idx]
+ else:
+ transitions_idx = np.random.randint(self.num_transitions_in_complete_episodes(), size=size)
+ batch = [self.transitions[i] for i in transitions_idx]
else:
raise ValueError("The episodic replay buffer cannot be sampled since there are no complete episodes yet. "
diff --git a/rl_coach/presets/Atari_ACER.py b/rl_coach/presets/Atari_ACER.py
new file mode 100644
index 0000000..4d96d77
--- /dev/null
+++ b/rl_coach/presets/Atari_ACER.py
@@ -0,0 +1,45 @@
+from rl_coach.agents.acer_agent import ACERAgentParameters
+from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
+from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
+from rl_coach.environments.environment import SingleLevelSelection
+from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4
+from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
+from rl_coach.graph_managers.graph_manager import ScheduleParameters
+from rl_coach.memories.memory import MemoryGranularity
+
+####################
+# Graph Scheduling #
+####################
+schedule_params = ScheduleParameters()
+schedule_params.improve_steps = TrainingSteps(10000000000)
+schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
+schedule_params.evaluation_steps = EnvironmentEpisodes(3)
+schedule_params.heatup_steps = EnvironmentSteps(0)
+
+#########
+# Agent #
+#########
+agent_params = ACERAgentParameters()
+
+agent_params.algorithm.apply_gradients_every_x_episodes = 1
+agent_params.algorithm.num_steps_between_gradient_updates = 20
+agent_params.algorithm.ratio_of_replay = 4
+agent_params.algorithm.num_transitions_to_start_replay = 10000
+agent_params.memory.max_size = (MemoryGranularity.Transitions, 50000)
+agent_params.network_wrappers['main'].learning_rate = 0.0001
+agent_params.algorithm.beta_entropy = 0.05
+
+###############
+# Environment #
+###############
+env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
+
+########
+# Test #
+########
+preset_validation_params = PresetValidationParameters()
+preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
+
+graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
+ schedule_params=schedule_params, vis_params=VisualizationParameters(),
+ preset_validation_params=preset_validation_params)
diff --git a/rl_coach/presets/CartPole_ACER.py b/rl_coach/presets/CartPole_ACER.py
new file mode 100644
index 0000000..e90444d
--- /dev/null
+++ b/rl_coach/presets/CartPole_ACER.py
@@ -0,0 +1,49 @@
+from rl_coach.agents.acer_agent import ACERAgentParameters
+from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
+from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
+from rl_coach.environments.gym_environment import GymVectorEnvironment
+from rl_coach.filters.filter import InputFilter
+from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter
+from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
+from rl_coach.graph_managers.graph_manager import ScheduleParameters
+from rl_coach.memories.memory import MemoryGranularity
+
+####################
+# Graph Scheduling #
+####################
+schedule_params = ScheduleParameters()
+schedule_params.improve_steps = TrainingSteps(10000000000)
+schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10)
+schedule_params.evaluation_steps = EnvironmentEpisodes(1)
+schedule_params.heatup_steps = EnvironmentSteps(0)
+
+#########
+# Agent #
+#########
+agent_params = ACERAgentParameters()
+
+agent_params.algorithm.num_steps_between_gradient_updates = 5
+agent_params.algorithm.ratio_of_replay = 4
+agent_params.algorithm.num_transitions_to_start_replay = 1000
+agent_params.memory.max_size = (MemoryGranularity.Transitions, 50000)
+agent_params.input_filter = InputFilter()
+agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/200.))
+agent_params.algorithm.beta_entropy = 0.0
+
+###############
+# Environment #
+###############
+env_params = GymVectorEnvironment(level='CartPole-v0')
+
+########
+# Test #
+########
+preset_validation_params = PresetValidationParameters()
+preset_validation_params.test = True
+preset_validation_params.min_reward_threshold = 150
+preset_validation_params.max_episodes_to_achieve_reward = 300
+preset_validation_params.num_workers = 1
+
+graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
+ schedule_params=schedule_params, vis_params=VisualizationParameters(),
+ preset_validation_params=preset_validation_params)
diff --git a/rl_coach/presets/Doom_Basic_ACER.py b/rl_coach/presets/Doom_Basic_ACER.py
new file mode 100644
index 0000000..41f774c
--- /dev/null
+++ b/rl_coach/presets/Doom_Basic_ACER.py
@@ -0,0 +1,55 @@
+from rl_coach.agents.acer_agent import ACERAgentParameters
+from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
+from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
+from rl_coach.environments.doom_environment import DoomEnvironmentParameters
+from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
+from rl_coach.graph_managers.graph_manager import ScheduleParameters
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.filters.filter import InputFilter
+from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter
+
+####################
+# Graph Scheduling #
+####################
+
+schedule_params = ScheduleParameters()
+schedule_params.improve_steps = TrainingSteps(10000000000)
+schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10)
+schedule_params.evaluation_steps = EnvironmentEpisodes(1)
+schedule_params.heatup_steps = EnvironmentSteps(0)
+
+
+#########
+# Agent #
+#########
+agent_params = ACERAgentParameters()
+
+agent_params.algorithm.num_steps_between_gradient_updates = 30
+agent_params.algorithm.apply_gradients_every_x_episodes = 1
+agent_params.network_wrappers['main'].learning_rate = 0.0001
+agent_params.algorithm.ratio_of_replay = 4
+agent_params.algorithm.num_transitions_to_start_replay = 2000
+agent_params.memory.max_size = (MemoryGranularity.Transitions, 100000)
+agent_params.input_filter = InputFilter()
+agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/100.))
+agent_params.algorithm.beta_entropy = 0.01
+agent_params.network_wrappers['main'].clip_gradients = 40.
+
+###############
+# Environment #
+###############
+env_params = DoomEnvironmentParameters(level='basic')
+
+########
+# Test #
+########
+preset_validation_params = PresetValidationParameters()
+preset_validation_params.test = True
+preset_validation_params.min_reward_threshold = 20
+preset_validation_params.max_episodes_to_achieve_reward = 400
+preset_validation_params.num_workers = 8
+
+
+graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
+ schedule_params=schedule_params, vis_params=VisualizationParameters(),
+ preset_validation_params=preset_validation_params)
diff --git a/rl_coach/spaces.py b/rl_coach/spaces.py
index a6bd356..f4ef11e 100644
--- a/rl_coach/spaces.py
+++ b/rl_coach/spaces.py
@@ -403,7 +403,8 @@ class DiscreteActionSpace(ActionSpace):
return np.random.choice(self.actions)
def sample_with_info(self) -> ActionInfo:
- return ActionInfo(self.sample(), action_probability=1. / (self.high[0] - self.low[0] + 1))
+ return ActionInfo(self.sample(),
+ all_action_probabilities=np.full(len(self.actions), 1. / (self.high[0] - self.low[0] + 1)))
def get_description(self, action: int) -> str:
if type(self.descriptions) == list and 0 <= action < len(self.descriptions):
@@ -450,7 +451,7 @@ class MultiSelectActionSpace(ActionSpace):
return random.choice(self.actions)
def sample_with_info(self) -> ActionInfo:
- return ActionInfo(self.sample(), action_probability=1. / len(self.actions))
+ return ActionInfo(self.sample(), all_action_probabilities=np.full(len(self.actions), 1. / len(self.actions)))
def get_description(self, action: np.ndarray) -> str:
if np.sum(len(np.where(action == 0)[0])) + np.sum(len(np.where(action == 1)[0])) != self.shape or \
diff --git a/rl_coach/tests/test_spaces.py b/rl_coach/tests/test_spaces.py
index 051ddbd..518cbf1 100644
--- a/rl_coach/tests/test_spaces.py
+++ b/rl_coach/tests/test_spaces.py
@@ -14,7 +14,7 @@ def test_discrete():
for i in range(100):
assert 3 > action_space.sample() >= 0
action_info = action_space.sample_with_info()
- assert action_info.action_probability == 1. / 3
+ assert action_info.all_action_probabilities[0] == 1. / 3
assert action_space.high == 2
assert action_space.low == 0
|