Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/acer","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/policy_optimization/sac","components/agents/policy_optimization/td3","components/agents/policy_optimization/wolpertinger","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/batch_rl","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/acer.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/policy_optimization/sac.rst","components/agents/policy_optimization/td3.rst","components/agents/policy_optimization/wolpertinger.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/batch_rl.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.acer_agent":{ACERAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],freeze_memory:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],initialize_session_dependent_components:[3,1,1,""],learn_from_batch:[3,1,1,""],load_memory_from_file:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,1,1,""],phase:[3,1,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_off_policy_evaluation:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[8,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[53,0,1,""],DQNAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[53,1,1,""],call_memory:[53,1,1,""],choose_action:[53,1,1,""],collect_savers:[53,1,1,""],create_networks:[53,1,1,""],freeze_memory:[53,1,1,""],get_predictions:[53,1,1,""],get_state_embedding:[53,1,1,""],handle_episode_ended:[53,1,1,""],improve_reward_model:[53,1,1,""],init_environment_dependent_modules:[53,1,1,""],initialize_session_dependent_components:[53,1,1,""],learn_from_batch:[53,1,1,""],load_memory_from_file:[53,1,1,""],log_to_screen:[53,1,1,""],observe:[53,1,1,""],parent:[53,1,1,""],phase:[53,1,1,""],post_training_commands:[53,1,1,""],prepare_batch_for_inference:[53,1,1,""],register_signal:[53,1,1,""],reset_evaluation_state:[53,1,1,""],reset_internal_state:[53,1,1,""],restore_checkpoint:[53,1,1,""],run_off_policy_evaluation:[53,1,1,""],run_pre_network_filter_for_inference:[53,1,1,""],save_checkpoint:[53,1,1,""],set_environment_parameters:[53,1,1,""],set_incoming_directive:[53,1,1,""],set_session:[53,1,1,""],setup_logger:[53,1,1,""],sync:[53,1,1,""],train:[53,1,1,""],update_log:[53,1,1,""],update_step_in_episode_log:[53,1,1,""],update_transition_before_adding_to_replay_buffer:[53,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[22,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[23,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[24,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[11,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[25,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[26,0,1,""]},"rl_coach.agents.soft_actor_critic_agent":{SoftActorCriticAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.td3_agent":{TD3AlgorithmParameters:[13,0,1,""]},"rl_coach.agents.wolpertinger_agent":{WolpertingerAlgorithmParameters:[14,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[27,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[27,1,1,""],apply_and_reset_gradients:[27,1,1,""],apply_gradients:[27,1,1,""],collect_savers:[27,1,1,""],construct:[27,1,1,""],get_variable_value:[27,1,1,""],get_weights:[27,1,1,""],parallel_predict:[27,1,1,""],predict:[27,1,1,""],reset_accumulated_gradients:[27,1,1,""],set_variable_value:[27,1,1,""],set_weights:[27,1,1,""],train_on_batch:[27,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[27,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[27,1,1,""],apply_gradients_to_global_network:[27,1,1,""],apply_gradients_to_online_network:[27,1,1,""],collect_savers:[27,1,1,""],parallel_prediction:[27,1,1,""],set_is_training:[27,1,1,""],sync:[27,1,1,""],train_and_sync_networks:[27,1,1,""],update_online_network:[27,1,1,""],update_target_network:[27,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[27,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[28,0,1,""],Batch:[28,0,1,""],EnvResponse:[28,0,1,""],Episode:[28,0,1,""],Transition:[28,0,1,""]},"rl_coach.core_types.Batch":{actions:[28,1,1,""],game_overs:[28,1,1,""],goals:[28,1,1,""],info:[28,1,1,""],info_as_list:[28,1,1,""],n_step_discounted_rewards:[28,1,1,""],next_states:[28,1,1,""],rewards:[28,1,1,""],shuffle:[28,1,1,""],size:[28,1,1,""],slice:[28,1,1,""],states:[28,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[28,1,1,""],get_last_transition:[28,1,1,""],get_transition:[28,1,1,""],get_transitions_attribute:[28,1,1,""],insert:[28,1,1,""],is_empty:[28,1,1,""],length:[28,1,1,""],update_discounted_rewards:[28,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[29,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[29,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[30,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[30,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[30,0,1,""]},"rl_coach.environments.environment":{Environment:[30,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[30,1,1,""],close:[30,1,1,""],get_action_from_user:[30,1,1,""],get_available_keys:[30,1,1,""],get_goal:[30,1,1,""],get_random_action:[30,1,1,""],get_rendered_image:[30,1,1,""],goal_space:[30,1,1,""],handle_episode_ended:[30,1,1,""],last_env_response:[30,1,1,""],phase:[30,1,1,""],render:[30,1,1,""],reset_internal_state:[30,1,1,""],set_goal:[30,1,1,""],state_space:[30,1,1,""],step:[30,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[30,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[30,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[31,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[31,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[31,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[31,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[31,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[31,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[31,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[31,1,1,""],get_action:[31,1,1,""],requires_action_values:[31,1,1,""],reset:[31,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[31,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[31,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[31,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[31,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[31,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[34,0,1,""],BoxDiscretization:[34,0,1,""],BoxMasking:[34,0,1,""],FullDiscreteActionSpaceMap:[34,0,1,""],LinearBoxToBoxMap:[34,0,1,""],PartialDiscreteActionSpaceMap:[34,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[33,0,1,""],ObservationCropFilter:[33,0,1,""],ObservationMoveAxisFilter:[33,0,1,""],ObservationNormalizationFilter:[33,0,1,""],ObservationRGBToYFilter:[33,0,1,""],ObservationReductionBySubPartsNameFilter:[33,0,1,""],ObservationRescaleSizeByFactorFilter:[33,0,1,""],ObservationRescaleToSizeFilter:[33,0,1,""],ObservationSqueezeFilter:[33,0,1,""],ObservationStackingFilter:[33,0,1,""],ObservationToUInt8Filter:[33,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[33,0,1,""],RewardNormalizationFilter:[33,0,1,""],RewardRescaleFilter:[33,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[36,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[35,0,1,""],EpisodicHRLHindsightExperienceReplay:[35,0,1,""],EpisodicHindsightExperienceReplay:[35,0,1,""],SingleEpisodeBuffer:[35,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[35,0,1,""],ExperienceReplay:[35,0,1,""],PrioritizedExperienceReplay:[35,0,1,""],QDND:[35,0,1,""],TransitionCollection:[35,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[37,0,1,""]},"rl_coach.spaces":{ActionSpace:[38,0,1,""],AttentionActionSpace:[38,0,1,""],BoxActionSpace:[38,0,1,""],CompoundActionSpace:[38,0,1,""],DiscreteActionSpace:[38,0,1,""],GoalsSpace:[38,0,1,""],ImageObservationSpace:[38,0,1,""],MultiSelectActionSpace:[38,0,1,""],ObservationSpace:[38,0,1,""],PlanarMapsObservationSpace:[38,0,1,""],Space:[38,0,1,""],VectorObservationSpace:[38,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[38,1,1,""],contains:[38,1,1,""],is_valid_index:[38,1,1,""],sample:[38,1,1,""],sample_with_info:[38,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[38,0,1,""],clip_action_to_space:[38,1,1,""],contains:[38,1,1,""],distance_from_goal:[38,1,1,""],get_reward_for_goal_and_state:[38,1,1,""],goal_from_state:[38,1,1,""],is_valid_index:[38,1,1,""],sample:[38,1,1,""],sample_with_info:[38,1,1,""]},"rl_coach.spaces.ObservationSpace":{contains:[38,1,1,""],is_valid_index:[38,1,1,""],sample:[38,1,1,""]},"rl_coach.spaces.Space":{contains:[38,1,1,""],is_valid_index:[38,1,1,""],sample:[38,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"]},objtypes:{"0":"py:class","1":"py:method"},terms:{"100x100":34,"160x160":33,"1_0":[16,26],"1st":31,"20x20":34,"210x160":33,"2nd":31,"50k":42,"9_amd64":45,"abstract":[39,43],"boolean":[3,28,38,53],"break":41,"case":[0,3,5,23,27,28,31,38,47,52,53,54],"class":[0,1,2,3,4,5,6,7,8,10,11,12,13,14,16,18,20,21,22,23,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,40,42,46,53],"default":[0,31,54],"enum":[27,30,38],"export":[0,27,45],"final":[8,13,14,17,18,20,24,42],"float":[3,4,5,6,7,8,10,11,12,13,16,20,23,24,25,27,28,30,31,33,34,35,38,39,53],"function":[0,1,3,6,7,8,11,13,27,30,31,38,39,40,42,44,53],"import":[6,19,31,35,40,52,54],"int":[0,3,4,5,6,7,10,16,21,23,25,26,28,30,31,33,34,35,38,53],"long":44,"new":[0,3,7,8,11,12,13,23,24,27,28,34,42,43,47,51,52,53],"return":[0,3,8,10,11,13,15,20,23,24,26,27,28,30,31,33,35,38,39,40,42,52,53],"short":[0,42],"static":27,"super":[39,40],"switch":[0,41],"true":[0,3,4,5,6,7,8,11,12,13,23,24,26,27,28,30,31,34,35,38,53],"try":[4,48,52],"while":[0,5,6,8,9,10,11,12,13,14,27,30,41,44,52,54],AWS:45,Adding:[19,51],And:[40,52],But:[41,52],Doing:52,For:[0,1,2,3,4,7,10,15,16,17,18,21,23,24,27,28,30,31,32,33,34,38,39,40,42,43,44,45,48,53,54],Has:27,Its:53,NFS:[29,45],Not:[],One:[25,52,54],That:41,The:[0,1,2,3,4,5,6,7,8,10,11,12,13,15,16,20,21,22,23,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,41,42,43,44,45,48,49,51,52,53,54],Then:[4,7,8,13,14,15,22,24],There:[7,11,27,31,32,39,40,44,47,54],These:[1,2,3,25,30,37,43,44,45],Use:[1,2,8,13,14,22,23],Used:31,Uses:52,Using:[8,13,14,15,17,18,45],Will:27,With:[31,51],__init__:[30,39,40],_index:[5,21],_nois:13,_render:40,_restart_environment_episod:40,_take_act:40,_update_st:40,a2c:52,a3c:[10,21,41,52],a_i:23,a_t:[4,5,6,8,12,13,14,15,16,17,18,20,21,22,24,26],a_valu:5,abl:[34,52],about:[3,28,42,53,54],abov:[8,12,13,14,27,42],abs:[21,35],absolut:31,acceler:22,accept:30,access:[27,39,45],accord:[0,3,4,5,6,8,12,13,14,15,21,27,28,31,38,41,42,44,53],accordingli:[23,38,42,54],account:[4,7,11,23,24,31],accumul:[3,4,5,6,10,21,23,26,27,33,52,53],accumulate_gradi:27,accumulated_gradi:27,accur:52,acer:[3,52],acer_ag:6,aceralgorithmparamet:6,achiev:[0,4,7,30,33,35,38,48,52,54],acquir:12,across:[10,20,41],act:[3,4,8,13,15,25,38,39,42,53],action:[1,2,3,16,17,18,19,20,21,24,25,26,27,28,30,31,32,35,39,40,42,44,53],action_idx:40,action_penalti:[8,13],action_spac:[30,31],action_space_s:27,action_valu:[28,31],actioninfo:[3,38,42,53],actionspac:[31,38],actiontyp:40,activ:[8,13,27],actor:[3,6,7,8,11,13,14,31,44,52],actor_critic_ag:5,actorcriticag:39,actorcriticalgorithmparamet:5,actual:[4,5,14,16,17,18,25,26,31,34,35],adam:[7,27],adam_optimizer_beta1:27,adam_optimizer_beta2:27,adapt:[7,11],add:[8,9,13,14,22,27,28,31,33,40,42,45,52],add_rendered_image_to_env_respons:0,added:[0,4,6,7,10,11,14,23,31,35,39],adding:[3,11,31,39,53],addit:[3,27,28,30,31,33,35,38,40,41,42,44,51,52,53],addition:[27,30,33,39,40,42,48,49,54],additional_fetch:27,additional_input:27,additional_simulator_paramet:[30,40],additionali:41,additive_nois:31,additivenoiseparamet:31,address:13,advanc:[26,51],advantag:[3,5,7,11,19,31],affect:[0,15,27],aforement:[17,18,24],after:[0,3,8,10,11,12,14,21,22,24,26,27,28,30,33,38,53,54],again:31,against:3,agent:[0,1,2,4,5,6,7,8,10,11,12,13,14,16,18,20,21,22,23,24,25,26,27,28,30,31,32,33,34,38,40,41,44,46,48,51,52,53],agent_param:43,agent_paramet:[3,27,53],agentparamet:[3,27,39],aggreg:42,ahead:[4,52],aim:31,algorithm:[3,28,31,39,41,42,43,47,48,50,51,53],algorithmparamet:[3,39],all:[0,3,10,15,23,24,27,28,30,31,33,34,38,39,40,41,42,43,44,45,47,49,53,54],all_action_prob:28,allow:[0,3,4,13,19,27,28,30,31,32,33,34,35,41,42,43,44,51,52,53,54],allow_brak:30,allow_duplicates_in_batch_sampl:35,allow_no_action_to_be_select:38,almost:47,along:[23,30,31,49],alpha:[6,20,24,35],alreadi:[23,28,40,52],also:[5,6,7,23,24,27,30,38,39,41,47,48,52,54],altern:[30,40,49],alwai:[27,31,34],amazon:45,amazonaw:45,amount:[8,10,13,20,24,31,42,52],analysi:41,analyz:41,ani:[3,27,28,30,34,35,39,42,43,44,45,47,53],anoth:[3,19,27,32,53],answer:52,anymor:[3,53],api:[30,44,49,51],appear:[3,53],appli:[0,3,5,8,10,13,14,21,27,28,31,33,52,53],applic:52,apply_and_reset_gradi:27,apply_gradi:27,apply_gradients_and_sync_network:27,apply_gradients_every_x_episod:[5,10,21],apply_gradients_to_global_network:27,apply_gradients_to_online_network:27,apply_stop_condit:0,appropri:45,approx:[8,12,13,14],approxim:[12,13,44,52],apt:45,arbitrari:33,architectur:[3,19,39,51,53],architecture_num_q_head:31,area:34,arg:[3,27,45,53],argmax_a:[17,20,24],argument:[3,16,26,27,30,38,42,53],around:[27,28,44],arrai:[3,27,28,30,33,38,40,53],art:[3,46],artifact:45,artifici:35,arxiv:[21,35],aspect:[31,33,41],assign:[0,2,5,6,27,31],assign_kl_coeffici:27,assign_op:27,assum:[28,31,33,35,52],async:[27,43],async_train:27,asynchron:[5,21,27],atari:[18,30,33,45,54],atari_a3c:54,atari_dqn:54,ath:19,atom:[16,25,26],attach:30,attempt:0,attend:34,attent:34,attentionactionspac:34,attentiondiscret:34,attribut:28,attribute_nam:28,author:[30,48,49],auto_select_all_armi:30,autoclean:45,automat:[27,54],autonom:[30,49,51],autoremov:45,auxiliari:[30,49],avail:[4,27,28,30,31,41,43,45,51,52,54],averag:[6,7,11,27,41,42],avg:6,aws:45,axes:[33,41],axi:[33,41],axis_origin:33,axis_target:33,back:[7,43],backend:[27,43,45,51,54],background:54,backpropag:23,backward:27,balanc:2,band:41,bar:6,base1:45,base64:45,base:[7,11,12,20,22,24,30,35,39,42,45,47,49,52,53],base_paramet:[0,3,27,30,31],baselin:52,basic:[10,28,43,54],batch:[1,2,3,4,5,6,8,10,11,12,13,14,15,16,17,18,19,21,24,25,26,27,35,39,42,50,51,53],batch_siz:27,batchnorm:27,bc_agent:1,bcalgorithmparamet:1,becaus:42,becom:[8,13,43],been:[19,28,33,48,52],befor:[0,3,5,11,13,26,27,28,33,42,43,44,45,52,53],begin:[0,4,42],behav:38,behavior:[3,33,35,39,48,52,53,54],being:[3,14,39,51,52,53],bellman:[16,25,26],benchmark:[41,50,51,52],benefici:47,best:[52,54],beta1:27,beta2:27,beta:[6,8,10,13,14,35],beta_entropi:[5,6,7,10,11],better:[13,19,47,52],between:[0,1,2,3,6,7,8,10,11,12,13,16,20,21,23,25,26,27,28,30,31,34,35,38,39,41,42,44,51,52],bfg:[7,11],bia:[6,52],big:[11,16,26],bin:[34,45],binari:15,bind:27,binomi:15,bit:33,blizzard:49,blob:[30,33],block:51,blog:51,boilerpl:42,bolling:41,bool:[0,3,4,5,6,7,8,11,12,13,23,24,26,27,28,30,31,35,38,53],boost:[45,52],bootstrap:[3,5,6,7,8,11,13,20,21,23,24,26,28,52],bootstrap_total_return_from_old_polici:[23,28],both:[3,7,27,30,31,34,52,53],bound:[6,7,11,16,26,31,38,52],box2d:45,box:[31,34,38],boxactionspac:34,boxdiscret:34,boxmask:34,breakout:54,breakoutdeterminist:[30,54],bring:11,bucket:45,buffer:[1,2,3,6,12,15,16,17,18,21,23,24,25,26,35,42,52,53,54],build:[32,51,52],builder:45,built:[39,42],bullet:6,button:[41,54],c51:16,cach:45,cadenc:13,calcul:[3,4,5,6,7,8,10,11,13,14,15,16,17,18,20,21,23,24,25,26,27,28,31,35,39,53],call:[0,3,10,21,27,28,30,42,53],call_memori:[3,53],callabl:38,camera:[30,40],camera_height:30,camera_width:30,cameratyp:[30,40],can:[0,2,3,5,6,7,8,11,12,13,24,27,28,30,31,32,33,34,38,39,40,41,42,44,47,49,51,53,54],candid:14,cannot:[3,47,53],carla:[33,49],carla_environ:30,carlaenviron:30,carlaenvironmentparamet:30,carlo:[3,24],cartpol:[30,40],cartpole_a3c:54,cartpole_clippedppo:[45,54],cartpole_dqn:54,categor:[3,5,6,52],categori:[32,33],categorical_dqn_ag:16,categoricaldqnalgorithmparamet:16,caus:[33,41],cdot:[5,7,8,10,12,13,14,15,16,17,18,20,22,24,26],central:[27,41],certainti:31,chain:[8,13,14],challeng:42,chang:[0,3,6,7,8,11,13,15,19,21,24,31,42,45,53],change_phas:31,channel:[30,33],channels_axi:38,check:[0,3,28,38,53],checkpoint:[0,3,27,29,43,45,53,54],checkpoint_dir:[3,53],checkpoint_prefix:[3,53],checkpoint_restore_dir:[0,54],checkpoint_restore_path:0,checkpoint_save_dir:0,checkpoint_save_sec:0,child:27,chmod:45,choic:[39,45],choos:[3,19,24,31,32,34,38,39,42,44,52,53,54],choose_act:[3,39,42,53],chosen:[3,12,14,24,31,34,39,53],chunk:11,cil:52,cil_ag:2,cilalgorithmparamet:2,classic_control:45,clean:[30,39,45],cli:45,clip:[3,6,8,11,13,27,33,38,52],clip_action_to_spac:38,clip_critic_target:[8,13],clip_gradi:27,clip_high:31,clip_likelihood_ratio_using_epsilon:[7,11],clip_low:31,clip_max:33,clip_min:33,clipbyglobalnorm:27,clipped_ppo_ag:7,clippedppoalgorithmparamet:7,clipping_high:33,clipping_low:33,clone:[3,52],close:30,cmake:45,coach:[0,3,27,29,30,31,32,36,37,39,42,46,47,48,49,52,54],code:[40,42,52],coeffici:[7,11,27,31,35],collect:[3,7,10,11,21,27,28,35,42,47,48,51,53,54],collect_sav:[3,27,53],color:33,com:45,combin:[26,44,51,52],comma:0,command:[42,45,54],common:[39,41,45,54],commun:43,compar:[0,11,19,52],complet:[28,31,42],complex:[27,32,42,44,52,54],compon:[3,16,26,27,31,37,39,42,51,53,54],composit:[3,53],compositeag:[3,53],comput:[27,31],concat:27,concentr:42,condit:[0,3],confid:31,config:[30,54],configur:[3,5,10,39,45,53],confus:42,connect:[12,27],connectionist:10,consecut:[8,13,23],consequ:[21,31],consid:[5,6,31,34,41],consist:[8,13,30,33,34,38,42,49],constant:6,constantli:54,constantschedul:35,constrain:34,construct:[12,27,35],consumpt:33,contain:[0,1,2,3,15,27,28,30,38,40,42,53,54],content:45,contin:43,continu:[1,2,5,8,9,10,13,14,22,31,32,34,38,48],continuous_entropi:31,continuous_exploration_policy_paramet:31,contribut:[4,51],control:[2,3,5,6,7,8,11,27,31,33,41,49,51,52,53],control_suite_environ:30,controlsuiteenviron:30,conveni:[41,54],converg:10,convers:32,convert:[3,28,31,33,38,42,44,53],convolut:[27,44],coordin:34,copi:[8,12,13,15,16,17,18,20,21,22,24,25,26,27,45],core:[3,51,53],core_typ:[3,28,30,38,53,54],correct:[3,6,52],correctli:27,correl:31,correpond:28,correspond:[2,3,4,16,17,27,28,31,33,38,40,53],could:[3,27,38,45,47,53],count:20,countabl:34,counter:[3,53],counterpart:44,cpu:[0,27],crd:54,creat:[3,21,27,33,40,53,54],create_network:[3,53],create_target_network:27,creation:[3,53],credenti:45,critic:[3,6,7,8,11,13,14,31,44,52],crop:[33,34],crop_high:33,crop_low:33,cross:[1,16,26],csv:0,ctrl:41,cuda:45,cudnn7:45,curl:45,curr_stat:[3,39,53],current:[0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,17,18,20,22,23,24,25,27,28,30,31,33,34,38,39,42,51,52,53],custom:[30,31,38,39,42],custom_reward_threshold:30,cycl:42,dai:54,dashboard:[0,3,45,51,53],data:[0,3,10,21,27,35,42,43,45,47,48,51,52,53,54],data_stor:[29,45],dataset:[3,7,11,47,52,53,54],date:[23,44,52,54],dcp:[45,54],ddpg:[14,52],ddpg_agent:8,ddpgalgorithmparamet:8,ddqn:[20,24,52],deal:52,debug:[0,41,51],decai:[5,7,11,27],decid:[0,3,4,30,31,39,53],decis:[3,53],declar:0,decod:45,dedic:27,deep:[0,3,5,12,14,15,17,19,21,22,26,53],deepmind:49,def:[39,40],default_act:38,default_input_filt:40,default_output_filt:40,defin:[0,3,5,6,7,10,11,12,21,23,24,27,28,30,31,33,34,35,38,39,40,42,43,44,48,49,53,54],definit:[3,27,30,38,40,42,53],delai:[3,52],delta:[6,16,23,26],demonstr:[1,2,54],dens:31,densiti:20,depecr:0,depend:[0,3,6,27,33,35,38,40,45,48,52,53],deploi:[37,43,47],depth:30,descend:52,describ:[3,16,25,33,35,39,42,45,53],descript:[3,34,38,46,54],design:[42,45,51],desir:[34,39],destabil:10,detail:[3,28,46,47,49,51,54],determin:[2,3,23,28,35,53],determinist:[3,12,52],dev:45,develop:[42,48],deviat:[10,11,31,33,41],devic:27,dfp:52,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,27,28,30,31,38,53],dict_siz:35,dictat:4,dictionari:[2,3,27,28,30,35,38,39,53],did:30,differ:[0,1,2,3,4,5,6,7,10,11,15,19,27,30,31,33,38,39,40,41,43,44,51,52,53],differenti:19,difficult:[41,48],difficulti:54,dimens:[28,30,33,34],dimension:[11,34],dir:[0,3,53,54],direct:[3,30,53],directli:[3,5,14,42,44,53],directori:[0,27,39,41,45,54],disabl:54,disable_fog:30,disappear:30,disassembl:52,discard:[28,33],discount:[8,10,11,13,20,23,24,26,27,28,52],discret:[1,2,4,7,11,14,15,16,17,18,19,20,21,23,24,25,26,31,32,33,34,38,42],disentangl:42,disk:0,displai:[0,41],distanc:38,distance_from_go:38,distance_metr:38,distancemetr:38,distil:[3,53],distribut:[5,6,10,11,12,16,25,26,27,29,31,36,37,38,44,51,52,54],distributed_coach:43,distributed_coach_synchronization_typ:43,distributedcoachsynchronizationtyp:43,divereg:[7,11],diverg:[6,7,11,26],dnd:[0,23,52],dnd_key_error_threshold:23,dnd_size:23,do_action_hindsight:35,doc:45,docker:45,dockerfil:45,document:49,doe:[15,27,33],doesn:43,doing:[7,11,32],domain:44,don:[4,31,41,52],done:[0,3,7,10,11,13,30,33,40,53,54],doom:[30,40,45,49],doom_basic_bc:54,doom_basic_dqn:54,doom_environ:[30,40,54],doomenviron:[30,40],doomenvironmentparamet:[40,54],doominputfilt:40,doomlevel:30,doomoutputfilt:40,doubl:[3,20,26],doubli:53,down:[27,30,52],download:45,dpkg:45,dqn:[3,20,21,26,30,31,33,34,42,44,52],dqn_agent:[18,53],dqnagent:53,dqnalgorithmparamet:18,drive:[2,30,49,51],driving_benchmark:30,due:33,duel:[3,26],dump:[0,3,53],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,53],dump_one_value_per_step:[3,53],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,7,10,11,12,15,23,31,41,42,53,54],dynam:[41,48,52],e_greedi:31,each:[0,1,2,3,4,5,6,7,10,11,12,15,17,18,19,21,23,24,25,27,28,30,31,32,33,34,35,38,39,41,42,43,44,45,48,52,53],eas:41,easi:[40,41,51],easier:44,easili:[31,47,54],echo:45,effect:[0,3,6,7,21,33,42,53],effici:[6,42,52],either:[0,3,5,21,27,31,38,41,44,54],element:[3,15,27,33,38],elf:45,embbed:27,embed:[3,23,27,53],embedd:[27,44],embedding_merger_typ:27,embeddingmergertyp:27,emploi:52,empti:28,emul:6,enabl:[27,44,54],encod:[33,38],encourag:[22,24,42],end:[2,3,10,26,28,30,33,53,54],enforc:34,engin:[30,49],enough:[4,6,23],ensembl:[31,52],ensur:[6,27],enter:[3,53,54],entir:[11,20,23,26,31,34,42],entri:[23,42],entropi:[1,5,6,7,10,11,12,16,26,31,52],enumer:38,env:[28,45],env_param:40,env_respons:[3,53],enviorn:30,environ:[0,3,4,6,19,27,28,31,32,33,34,38,39,42,45,48,50,51,53],environmentparamet:[30,40],envrespons:[0,3,30,53],episod:[0,3,4,5,10,11,15,20,21,26,30,31,39,40,41,42,43,53,54],episode_max_tim:30,episodic_hindsight_experience_replai:35,epoch:[7,53],epsilon:[7,31,35],epsilon_schedul:31,equal:2,equat:[8,12,13,14,17,18,21,25],error:[13,27,52],escap:54,especi:19,essenti:[21,27,34,40,42,45],estim:[3,5,7,11,15,20,24,31,53],estimate_state_value_using_ga:[5,7,11],eta:[7,11],etc:[0,3,27,30,32,38,39,49,53],evalu:[0,3,12,14,27,28,31,42,47,53],evaluate_onli:0,evaluation_epsilon:31,evaluation_nois:31,even:[19,27,30,40,41,42,47,52],eventu:14,everi:[0,5,6,8,10,12,13,14,15,16,17,18,20,21,22,24,25,26,54],exact:[23,31,48],exactli:[14,27],exampl:[2,3,4,27,28,30,31,32,33,34,38,39,40,42,44,47,53,54],except:[21,28],execut:[28,41,42],exercis:13,exhibit:[3,39,53],exist:[23,27],exit:[3,53],expand_dim:28,expect:[0,3,31,48,53],experi:[0,6,8,11,12,13,14,26,30,35,36,41,42,43,45,47,51,52,54],experiment_path:[0,30],experiment_suit:30,experimentsuit:30,expert:[1,2,28,52],exploit:[31,42],explor:[3,4,5,6,7,8,9,11,13,14,15,20,22,23,39,42,47,51,52],exploration_polici:31,explorationparamet:[3,31,39],exponenti:[6,7,11,26,27],expor:3,export_onnx_graph:0,expos:[41,44,51],extend:[30,31,49],extens:[30,49],extent:54,extern:0,extra:[3,27,28,44,53],extract:[3,22,23,28,33,38,41,42,53],factor:[8,10,11,13,24,26,27,28,31,33],failur:0,faithfulli:41,fake:38,fals:[0,3,8,13,27,28,30,31,34,35,38,40,53],far:[11,33,42,48],faster:[19,52],featur:[8,13,30,44,51,52],feature_minimap_maps_to_us:30,feature_screen_maps_to_us:30,fetch:[27,28],fetched_tensor:27,few:[10,15,16,17,18,20,24,25,26,31,40],field:[48,51],file:[0,3,39,42,53,54],fill:[28,40],filter:[0,3,51,53],filtered_action_spac:38,find:[14,17,41,49,51],finish:[23,54],finit:34,first:[0,8,11,13,14,15,23,25,26,27,28,33,42,44],fit:[13,38],fix:47,flag:[0,3,27,28,30,53],flexibl:43,flicker:30,flow:[14,32,51],follow:[2,3,5,6,8,10,12,13,14,16,17,18,21,22,23,25,26,27,28,30,31,35,39,40,45,48,52,53],footprint:33,forc:[27,30,34,40],force_cpu:27,force_environment_reset:[30,40],force_int_bin:34,forced_attention_s:38,form:[4,21,38,52],format:39,formul:[5,6],forward:[27,31],found:[3,46,47,54],frac:[6,7,12,16,26],fraction:[7,11],frame:[0,30],frame_skip:30,framework:[0,3,27,39,51,53],framework_typ:0,free:[30,49],freeglut3:45,freez:[3,53],freeze_memori:[3,53],frequenc:13,from:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,36,38,39,40,41,42,43,44,45,47,48,49,51,53,54],full:[3,10,20,34,53],fulldiscreteactionspacemap:34,fulli:27,func:[3,53],further:47,futur:[0,3,10,28,52],future_measurements_weight:4,gae:[5,7,11],gae_lambda:[5,7,11],game:[3,28,30,49,51,53,54],game_ov:28,gamma:[5,6,8,12,13,14,15,16,17,18,20,21,22,23,24,26],gap:[24,52],gather:43,gaussian:[11,12,13,14,31],gener:[0,5,7,11,15,27,30,31,35,38,39,45,54],general_network:39,get:[3,4,7,8,9,10,11,13,14,15,17,18,20,22,24,27,28,30,31,38,42,44,45,48,53],get_act:31,get_action_from_us:30,get_available_kei:30,get_first_transit:28,get_goal:30,get_last_env_respons:30,get_last_transit:28,get_output_head:39,get_predict:[3,53],get_random_act:30,get_rendered_imag:[30,40],get_reward_for_goal_and_st:38,get_state_embed:[3,53],get_transit:28,get_transitions_attribut:28,get_variable_valu:27,get_weight:27,gfortran:45,gif:0,git:45,github:[40,45,48,51],given:[0,1,2,3,4,5,8,10,11,13,14,27,28,30,31,33,34,35,38,39,42,47,53],given_weight:27,global:[3,27,44,53],global_network:27,glx:45,goal:[1,2,3,4,6,27,28,30,35,42,44,52,53],goal_from_st:38,goal_nam:38,goal_spac:30,goal_vector:4,goals_spac:35,goalsspac:[35,38],goaltorewardconvers:38,going:32,good:[14,40,41],gpu:[0,27],gracefulli:54,gradient:[3,5,6,7,11,14,21,23,27,39,52,53],gradientclippingmethod:27,gradients_clipping_method:27,granular:35,graph:0,graphmanag:42,grayscal:[33,38],greedili:42,group:41,grow:26,guidelin:52,gym:[45,49],gym_environ:[30,54],gymenviron:30,gymenvironmentparamet:40,hac:52,had:48,hand:[19,33,42,52],handl:4,handle_episode_end:[3,30,53],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[41,52],harder:[41,47],has:[0,3,19,23,24,28,31,33,42,44,48,52,53],has_glob:27,has_target:27,hat:[6,7,16,26],have:[0,3,4,6,27,30,31,33,34,35,42,44,47,48,53],head:[1,2,3,5,6,10,15,19,22,23,27,31,39,44,53],headparamet:27,heads_paramet:27,health_gath:30,heat:6,heatup:[31,42],help:[24,28,41,42,52],here:[40,42],heurist:[11,31],hide:44,hierarch:[38,42],hierarchi:[3,42,52,53],high:[8,11,13,33,34,38,41],high_i:38,high_kl_penalty_coeffici:11,high_x:38,higher:11,highest:[5,6,10,14,24,31,33,34,38],highli:[0,40,52],hindsight:[9,35,52],hindsight_goal_selection_method:35,hindsight_transitions_per_regular_transit:35,hindsightgoalselectionmethod:35,hold:[15,27,28,35,41,42,44],horizont:[45,51,54],host:45,hostnam:0,hot:38,how:[4,7,11,31,43,45,52,54],hrl:35,html:45,http:[21,35,45],hub:45,huber:25,huber_loss_interv:25,human:[0,30],human_control:30,hyper:[39,48],hyperparamet:39,ident:27,identifi:[27,38],ies:53,ignor:30,imag:[0,27,30,33,34,38,40,44,54],image1:45,imit:[3,28,46,52],impact:27,implement:[3,7,11,27,29,30,31,35,39,40,43,48,52,54],impli:54,implment:37,importance_weight:27,importance_weight_trunc:6,importantli:42,improv:[5,19,26,30,42,52],improve_reward_model:53,includ:[0,3,4,30,32,33,37,44,49,53,54],incorpor:27,increas:[11,24,33,52],increment:[3,53],index:[0,2,14,28,30,33,34,35,38],indic:38,inf:[33,38],infer:[3,27,30,47,53],infinit:[0,52],info:[3,15,28,38,40,53],info_as_list:28,inform:[3,4,21,28,30,32,41,42,45,49,53],inherit:[3,39,40],init_environment_dependent_modul:[3,53],initi:[3,4,11,24,27,28,39,42,51,53],initial_feed_dict:27,initial_kl_coeffici:11,initialize_session_dependent_compon:[3,53],innov:52,input:[1,2,3,4,8,13,14,15,17,18,20,22,23,24,27,32,38,42,44,53],input_embedders_paramet:27,input_high:33,input_low:33,input_space_high:34,input_space_low:34,inputembedderparamet:27,inputfilt:42,insert:[23,28],inspect:0,instal:[45,54],instanc:[3,36,38,44],instanti:[3,30,42],instead:[0,3,7,21,24,27,33,34,42,52,53],instruct:54,intact:[15,48],integ:[0,33,34],integr:[40,42,43,47,51],intel:51,intend:[10,27,31,42],interact:[28,42,43,47,51,54],interest:[27,41],interfac:[30,41,43,49],intermedi:23,intern:[3,10,21,27,28,32,42,53,54],intersect:52,interv:25,intro:51,introduc:52,invers:[30,49],invok:42,involv:39,is_empti:28,is_valid_index:38,item:28,iter:[3,5,6,8,11,13,19,27,53],its:[0,3,16,26,27,28,31,38,42,45,52,53,54],itself:[27,38,54],job:0,job_typ:0,joint:30,json:0,jump:[4,34],jupyt:39,just:[3,11,24,26,40,42,44,47,53,54],kapa:25,keep:[18,28,33,54],kei:[2,23,27,28,30,35,39,41,45,52,54],key_error_threshold:35,key_width:35,keyboard:[30,54],keyword:27,kl_coeffici:27,kl_coefficient_ph:27,know:[3,52,53,54],knowledg:[3,42,53],known:[28,41,48,52],kubeconfig:37,kubernet:45,kubernetes_orchestr:37,kubernetesparamet:37,kwarg:[27,30],l2_norm_added_delta:23,l2_regular:27,lack:41,lamb:31,lambda:[5,7,11,31],lane:2,larg:[14,31,33,49],larger:27,last:[4,6,11,23,28,30,33],last_env_respons:30,lastli:42,later:[0,3,27,53,54],latest:[21,23,42,45],layer:[27,31,35,42,44],lazi:[28,33],lazystack:33,lbfg:27,ld_library_path:45,lead:31,learn:[0,3,4,5,6,8,9,10,12,14,15,16,17,18,19,22,25,26,27,28,30,31,33,41,42,44,46,48,49,50,52,53],learn_from_batch:[3,39,42,53],learner:27,learning_r:[27,35],learning_rate_decay_r:27,learning_rate_decay_step:27,least:[44,52],leav:[11,15],left:[2,6,12,52],length:[4,5,7,11,21,23,27,28],less:[19,52],level:[0,3,27,30,40,53,54],levelmanag:[3,42,53],levelselect:30,libatla:45,libav:45,libavformat:45,libbla:45,libboost:45,libbz2:45,libfluidsynth:45,libgl1:45,libglew:45,libgm:45,libgstream:45,libgtk2:45,libgtk:45,libjpeg:45,liblapack:45,libnotifi:45,libopen:45,libosmesa6:45,libportmidi:45,librari:[30,45,49],libsdl1:45,libsdl2:45,libsdl:45,libsm:45,libsmpeg:45,libswscal:45,libtiff:45,libwebkitgtk:45,libwildmidi:45,like:[12,30,38,42,44,45,47,52],likelihood:[7,11],line:[3,42,53,54],linear:34,linearboxtoboxmap:34,linearli:34,list:[0,3,4,27,28,30,31,33,34,38,39,53,54],load:[0,3,41,43,53,54],load_memory_from_fil:[3,53],load_memory_from_file_path:54,local:[3,44,45,53],locat:[25,28,33,52],log:[0,3,5,6,10,12,53],log_to_screen:[3,53],logger:[0,3,53],look:[40,45],loop:42,loss:[1,2,3,6,7,10,11,16,17,18,25,26,27,31,39,44,53],lot:[31,41,47,48,52],low:[8,11,13,33,34,38],low_i:38,low_x:38,lower:[0,35,42],lowest:[33,34,38],lstm:44,lumin:33,lvert:[6,16,26],lvl:54,mai:[0,27,46,54],main:[3,39,42,44,46,53,54],mainli:43,major:31,make:[0,3,27,30,39,41,45,47,48,52,53],manag:[3,27,43,45,53],mandatori:[38,40,44],mani:[3,19,46,48],manner:[11,20,21,24,33,42],manual:45,map:[3,27,30,32,33,34,38,39,53],mark:28,markdown:53,mask:[15,34],masked_target_space_high:34,masked_target_space_low:34,master:[3,42,45,53],match:[2,23,27,38],mathbb:[5,6],mathcal:13,mathop:5,max:[5,6,13,16,21,26,33],max_a:[15,18,23,24],max_action_valu:28,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_kl_diverg:6,max_over_num_fram:30,max_simultaneous_selected_act:38,max_siz:35,max_spe:30,maxim:[4,17],maximum:[0,12,16,18,23,24,28,30,31,33,35,52],mdp:47,mean:[0,2,7,8,9,10,11,12,13,14,22,27,31,33,34,38,41,47,52],meant:44,measur:[3,4,27,30,33,38,40,52,53],measurements_nam:38,mechan:[32,43,48,54],memor:52,memori:[3,26,28,33,39,42,43,45,51,52,53],memory_backend:45,memorygranular:35,memoryparamet:[3,39],merg:[27,30],mesa:45,method:[0,5,7,11,13,21,27,33,35],metric:[0,38,41],mid:6,middlewar:[23,27,44],middleware_paramet:27,middlewareparamet:27,midpoint:25,might:[3,10,30,39,44,53],min:[6,7,13,16,24,26],min_:[12,13],min_reward_threshold:0,mind:54,minim:[2,4,16],minimap_s:30,minimum:[0,7,13,33],mitig:52,mix:[3,7,11,23,24,52],mixedmontecarloalgorithmparamet:20,mixer1:45,mixtur:[20,27],mjkei:45,mjpro150:45,mjpro150_linux:45,mkdir:45,mmc:[20,52],mmc_agent:20,mode:[24,27,29,36,37,42,43,45,54],model:[0,20,22,27,51,53,54],modif:52,modifi:6,modul:[3,39,42,43,53],modular:[39,42,44,51],monitor:43,mont:[3,24],monte_carlo_mixing_r:[20,24],more:[3,8,13,21,27,33,39,41,42,44,45,47,51,53,54],moreov:41,most:[3,10,23,27,28,31,44,48,52,53,54],mostli:[33,42],motiv:42,move:[6,7,11,33,41,48],mp4:0,mse:[2,6,17,18,25],much:[7,11,42,52],mujoco:[30,34,40,45,49],mujoco_kei:45,mujoco_pi:45,multi:[11,27,38,44],multiarrai:[3,53],multidimension:38,multipl:[4,7,11,21,27,30,31,33,34,35,38,41,42,48,51,54],multipli:[4,10,27,33],multiselect:34,multitask:[30,49],must:[27,33,38,48],mxnet:54,n_step:[23,26,28,35],n_step_discounted_reward:28,n_step_q_ag:21,nabla:[6,8,13,14],nabla_:[8,12,13,14],nabla_a:[8,13,14],naf:52,naf_ag:22,nafalgorithmparamet:22,name:[3,27,28,30,33,38,39,45,53,54],namespac:37,nasm:45,nativ:[0,30,40,49],native_rend:0,navig:3,ndarrai:[3,27,28,30,31,33,34,38,40,53],nearest:23,neat:41,nec:[0,52],nec_ag:23,necalgorithmparamet:23,necessari:[3,23,27,53],necessarili:33,need:[0,3,6,26,27,30,31,38,39,42,48,52,53,54],neg:[4,33],neighbor:23,neighborhood:14,neon_compon:39,nervanasystem:45,network:[0,3,27,31,39,42,48,51,52,53,54],network_input_tupl:27,network_nam:[3,53],network_param:31,network_paramet:27,network_wrapp:[3,27,53],networkparamet:[3,27,31,39],networkwrapp:[3,53],neural:[3,20,27,44,48],never:27,new_value_shift_coeffici:[23,35],new_weight:27,newli:[24,40,47,52],next:[0,3,8,13,14,17,18,22,24,25,28,30,42,53,54],next_stat:28,nfs_data_stor:29,nfsdatastoreparamet:29,nice:54,no_accumul:27,node:[27,44],nois:[8,9,13,14,22,31,42,52],noise_as_percentage_from_action_spac:31,noise_schedul:31,noisi:[10,26,31],non_episod:35,none:[0,3,7,8,11,13,27,28,30,31,33,34,38,40,53],nor:[],norm:27,norm_unclipped_grad:27,norm_unclippsed_grad:27,normal:[3,4,10,31,32,33,38],note:[23,27,31,53],notebook:39,notic:[27,52],notori:[41,48,52],now:[7,40],nstepqalgorithmparamet:21,nth:26,num_act:[23,35,38],num_bins_per_dimens:34,num_class:35,num_consecutive_playing_step:[3,8,13,53],num_consecutive_training_step:[3,53],num_gpu:0,num_neighbor:35,num_predicted_steps_ahead:4,num_speedup_step:30,num_steps_between_copying_online_weights_to_target:[8,12,13,21],num_steps_between_gradient_upd:[5,6,10,21],num_task:0,num_training_task:0,num_transitions_to_start_replai:6,num_work:0,number:[0,2,4,5,6,8,10,12,13,15,16,21,23,25,26,27,28,30,31,33,34,35,41,49,53,54],number_of_knn:23,numpi:[3,27,28,30,31,33,34,38,40,53],nvidia:45,object:[0,3,26,27,30,31,33,35,42,53],observ:[0,3,4,11,27,28,30,32,40,42,53],observation_reduction_by_sub_parts_name_filt:33,observation_space_s:27,observation_space_typ:30,observation_stat:33,observation_typ:30,observationspac:38,observationspacetyp:30,observationtyp:30,off:[3,6,12,43,47,52,53],offer:[30,49],often:[41,42,44,47],old:[7,11,27,52],old_weight:27,onc:[0,7,10,11,15,16,17,18,20,21,24,25,26,27,38,54],one:[0,3,6,19,23,24,27,28,30,31,32,35,38,40,41,44,47,52,53],ones:[40,52],onli:[0,3,4,5,6,7,10,11,15,16,18,19,21,23,25,26,27,28,30,31,33,34,40,42,52,53,54],onlin:[8,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,42,44,47],online_network:27,onnx:[0,27],onto:32,open:[0,30,49],openai:[45,49],opencv:45,oper:[24,27,33],ops:27,optim:[3,4,6,27,46,52],optimization_epoch:7,optimizer_epsilon:27,optimizer_typ:27,option:[6,10,27,30,34,38,39,41,43,44,54],orchestr:[43,45,51],order:[0,3,5,6,7,8,10,11,12,13,14,17,18,19,21,22,23,24,25,27,28,32,33,34,41,42,44,47,48,52,53],org:[21,35],origin:[21,33,34,48],ornstein:[8,9,31],other:[0,2,10,19,24,27,30,32,33,35,41,42,52],otherwis:[11,15,27,30,31,38],ou_process:31,our:7,out:[2,17,18,31,32,34,41,45,51,52,54],outcom:[31,42],output:[0,4,6,8,13,14,15,16,22,23,27,31,32,33,38,39,44],output_0_0:27,output_observation_spac:33,outputfilt:42,outsid:[4,31],over:[3,7,10,11,21,23,26,27,28,31,33,34,41,42,52,53],overestim:[8,13,52],overfit:11,overhead:0,overlai:41,overrid:[3,53],override_existing_kei:35,overriden:39,overview:42,overwhelm:42,overwritten:27,own:[27,39],p_j:[16,26],page:[3,48],pair:[0,38],pal:[24,52],pal_ag:24,pal_alpha:24,palalgorithmparamet:24,paper:[5,10,12,16,21,23,25,30,35,48],parallel:[6,27,41,44],parallel_predict:27,param:[3,27,28,29,30,31,36,37,39,40,53],paramet:[2,3,4,5,6,7,8,10,11,12,13,16,20,21,23,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,40,48,51,53,54],parameter_nois:31,parameters_server_host:0,parent:[3,27,53],parent_path_suffix:[3,27,53],parmet:3,pars:42,part:[0,3,15,27,28,31,33,34,43,44,48,52,53],part_nam:33,partial:34,partialdiscreteactionspacemap:34,particular:4,particularli:[30,31,38,48,52],pass:[0,4,8,9,13,14,22,23,27,30,31,32,40,41,42,44,54],patamet:23,patchelf:45,patchelf_0:45,path:[0,3,27,39,40,45,53,54],pattern:42,pdf:35,penal:[7,8,11,13],penalti:11,pendulum_hac:40,pendulum_with_go:40,pendulumwithgo:40,per:[0,3,4,38,39,42,53],percentag:31,percentil:31,perceptron:44,perform:[0,3,6,27,28,33,35,40,41,42,52,53],period:[44,54],persist:3,persistent_advantage_learn:24,perspect:16,phase:[3,6,7,8,9,11,12,13,14,27,30,31,42,53],phi:[16,26],physic:[30,49],pi_:[6,7,12],pick:[12,30],pickl:54,pickledreplaybuff:54,pip3:45,pip:45,pixel:30,place:[34,41,42],placehold:[27,31],plai:[0,3,10,15,17,18,21,31,39,41,53],plain:44,planarmap:30,planarmapsobservationspac:33,platform:[30,49],pleas:[21,48],plu:27,plugin:45,point:[33,38,42,43],polici:[1,3,4,5,6,9,12,14,15,21,22,23,29,39,42,43,44,45,46,47,51,52,53],policy_gradient_rescal:[5,7,10,11],policy_gradients_ag:10,policygradientalgorithmparamet:10,policygradientrescal:[5,7,10,11],policyoptimizationag:39,popul:42,popular:[30,49],port:0,posit:[4,33],possibl:[2,3,4,23,31,34,38,41,44,51,52,53,54],post:[32,51],post_training_command:[3,53],power:[30,49],ppo:[7,11,52],ppo_ag:11,ppoalgorithmparamet:11,pre:[8,13,31,32],predefin:[15,24,31,54],predict:[1,2,3,5,6,7,8,11,12,13,15,16,17,18,24,25,26,27,31,44,52,53],prediction_typ:[3,53],predictiontyp:[3,53],prefect:52,prefer:27,prefix:[3,53],prep:45,prepar:[3,53],prepare_batch_for_infer:[3,53],present:[19,23,27,30,33,52],preset:[0,5,39,40,42,43,45,54],press:[41,54],prevent:[8,11,13,42],previou:33,previous:[11,27],print:[0,3,54],print_networks_summari:0,priorit:[26,35],prioriti:[26,35],privat:38,probabilit:[5,6],probabl:[3,5,6,10,15,16,26,28,31,39,52,53],problem:52,procedur:6,process:[0,3,8,9,27,31,32,33,34,39,41,42,44,47,48,51,53],produc:27,progress:27,project:[16,26],propag:7,propagate_updates_to_dnd:23,properti:[3,27,28,30,35,39,40,45,53],proport:35,proto:14,provid:[27,43],proxi:42,proxim:3,pub:[36,37,45],publish:48,purpos:[0,3,10],pursuit:2,push:[3,53],pybullet:[30,49],pygam:[0,45],pytest:45,python3:45,python:[30,35,39,45,49,51],q_i:12,qr_dqn_agent:25,quad:6,qualiti:30,quantil:[3,52],quantileregressiondqnalgorithmparamet:25,queri:[23,27,42,52],question:52,quit:[41,47],r_i:[5,21],r_t:[4,6,7,26],rainbow:[3,39,52],rainbow_ag:39,rainbow_dqn_ag:26,rainbowag:39,rainbowagentparamet:39,rainbowalgorithmparamet:39,rainbowdqnalgorithmparamet:26,rainbowexplorationparamet:39,rainbowmemoryparamet:39,rainbownetworkparamet:39,rais:[3,28,53],ramp:[39,42],random:[0,21,30,31,38,42,48],random_initialization_step:30,randomli:[28,42],rang:[4,7,8,11,13,16,26,30,33,34,38,52],rare:23,rate:[0,6,20,23,27,30,44],rate_for_copying_weights_to_target:[6,8,12,13],rather:[4,12,41],ratio:[6,7,11,20,33],ratio_of_replai:6,raw:[30,49],reach:[0,11,38],read:[0,29],read_csv_tri:0,readabl:42,readm:45,real:3,reason:[33,48],rebuild_on_every_upd:35,receiv:[27,28],recent:[3,26,27,52,53],recommend:40,redi:[36,37,45],redispubsub:45,redispubsubmemorybackendparamet:36,reduc:[1,2,10,11,24,27,33,42,52],reduct:33,reduction_method:33,reductionmethod:33,redund:33,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,43,45],referenc:3,regard:[3,53],region:[6,52],regist:[3,53],register_sign:[3,53],registri:45,regress:[2,3,52],regula:[6,7,11],regular:[5,7,10,11,21,23,27,31,34,35,52],regularli:27,reinforc:[3,5,8,9,10,12,14,16,17,18,19,21,24,25,26,30,31,41,42,44,46,48,49,50,52],relat:[27,45],relationship:52,releas:[0,51,52],relev:[3,15,31,33,53],remov:[0,33],render:[0,3,30,40],reorder:33,repeat:[30,42],replac:[31,33,35,45],replace_mse_with_huber_loss:27,replai:[1,2,3,6,8,12,13,14,15,16,17,18,21,23,24,25,26,35,42,52,53,54],replay_buff:54,replicated_devic:27,repo:40,repositori:51,repres:[0,7,11,16,26,27,28,30,31,34,38,54],represent:44,reproduc:[42,48],request:[3,27,53],requir:[3,27,29,31,33,41,44,45,52,53],requires_action_valu:31,rescal:[4,5,7,10,11,27,32,33],rescale_factor:33,research:[30,48,49],reset:[3,23,27,30,31,40,53],reset_accumulated_gradi:27,reset_evaluation_st:[3,53],reset_gradi:27,reset_internal_st:[3,30,53],resourc:[43,45],respect:[8,13,14,28,30],respons:[3,28,30,42,53],rest:[27,28,34,45],restart:40,restor:[0,3,53],restore_checkpoint:[3,53],result:[3,4,13,16,17,18,19,25,26,27,33,34,48,52,53,54],ret:6,retrac:6,retri:0,retriev:[23,35],return_additional_data:35,reus:42,reusabl:44,reward:[0,1,2,3,4,8,10,13,20,21,26,27,28,30,32,38,40,41,42,52,53],reward_test_level:0,reward_typ:38,rgb:[30,33,38],rho:[6,8,13,14],rho_t:6,right:[2,3,6,12,31,34,41,52,53],rl_coach:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,16,18,20,21,22,23,24,25,26,27,28,29,30,31,33,34,35,36,37,38,40,45,53,54],rms_prop_optimizer_decai:27,rmsprop:27,roboschool:[30,49],robot:[30,38,49,51],roboti:45,robust:53,rollout:[29,36,37,43,45,54],root:[41,45],rule:[8,13,14,15],run:[0,3,4,8,10,11,12,13,14,15,17,18,23,24,27,30,31,33,53,54],run_off_policy_evalu:[3,53],run_pre_network_filter_for_infer:[3,53],runphas:[3,53],runtim:45,rvert:[16,26],rvert_2:6,s3_bucket_nam:45,s3_creds_fil:45,s3_data_stor:29,s3_end_point:45,s3datastoreparamet:29,s_t:[4,5,6,8,12,13,14,15,16,17,18,20,21,22,24,26],sac:52,sai:52,same:[3,4,7,10,13,14,20,21,24,27,30,34,35,41,44,48,52,53],sampl:[1,2,3,5,6,8,10,11,12,13,14,15,16,17,18,20,21,24,25,26,27,31,35,38,42,45,53],sample_with_info:38,satur:[8,13],save:[0,3,26,27,31,45,53,54],save_checkpoint:[3,53],saver:[3,27,53],savercollect:[3,27,53],scale:[4,10,27,33,41,45,51,54],scale_down_gradients_by_number_of_workers_for_sync_train:27,scale_measurements_target:4,scaler:27,schedul:[7,31,35,42,43,45,54],scheme:[5,31,42,52],schulman:11,sci:45,scienc:48,scipi:[33,45],scope:27,scratch:52,scratchpad:0,screen:[3,30,40,54],screen_siz:30,script:42,second:[0,27,41,52,54],section:[45,46,49],see:[3,30,33,45,48,49,52,53,54],seed:[0,30,48],seen:[4,23,24,30,33,42,48,52],segment:[30,38],select:[5,15,23,27,28,31,33,34,38,40,41,42,51,54],self:[3,27,39,40,53],send:[40,44],separ:[0,3,19,33,34,44,46,47,52],separate_actions_for_throttle_and_brak:30,seper:10,sequenti:[4,28,35],serv:[7,10,44],server:0,server_height:30,server_width:30,sess:[3,27,53],session:[3,27,53],set:[0,2,3,4,5,6,7,8,11,13,16,17,18,20,23,24,26,27,28,30,31,33,34,38,39,43,48,49,51,52,53,54],set_environment_paramet:[3,53],set_goal:30,set_incoming_direct:[3,53],set_is_train:27,set_sess:[3,53],set_variable_valu:27,set_weight:27,setup:[3,45,47,53],setup_logg:[3,53],setuptool:45,sever:[0,3,7,10,11,15,27,30,31,33,39,40,41,42,44,49,52,53,54],shape:[27,33,38],share:[0,3,27,35,44,53],shared_memory_scratchpad:0,shared_optim:27,shift:[34,42],shine:41,should:[0,3,4,7,11,15,21,24,27,28,30,33,35,38,39,40,43,53,54],should_dump:0,shouldn:15,show:48,shown:48,shuffl:[3,28,53],side:[3,53],sigma:[13,31],signal:[3,42,53],signal_nam:[3,53],significantli:19,sim:[6,12],similar:[7,19,21,28,30,34,52],simpl:[10,35,39,40,44,51,52,54],simplest:52,simplif:52,simplifi:[7,41,44],simul:[30,40,47,49,54],simultan:7,sinc:[3,7,8,10,13,21,23,24,26,27,31,33,47,53],singl:[3,4,5,6,7,11,15,19,20,21,27,28,30,31,34,38,41,42,44,53],size:[27,28,31,33,34,35,38],skill:52,skip:[30,42],slave:[3,53],slice:28,slow:[27,52,54],slower:[0,13,19,27],slowli:[8,13],small:[7,13,23,35],smaller:31,smooth:[41,52],soft:[3,8,11,13,14,22,52],soft_actor_critic_ag:12,softactorcriticalgorithmparamet:12,softmax:[27,31],softmax_temperatur:27,softwar:45,sole:47,solut:52,solv:[33,40,49,51],some:[0,3,11,27,28,31,33,39,40,41,44,47,48,52,53,54],sort:25,sourc:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,16,18,20,21,22,23,24,25,26,27,28,29,30,31,33,34,35,36,37,38,40,45,49,53],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,30,31,32,33,34,35,42,51,53],spacesdefinit:[3,27,53],spatial:52,spawn:[43,45],special:19,specif:[0,3,15,19,23,27,28,39,42,54],specifi:[0,27,30,31,33,40,43,54],speed:[27,33,52],speedup:54,spread:[33,34],squar:33,squeeze_list:27,squeeze_output:27,src:45,stabil:[6,21,27,52],stabl:[44,52],stack:[3,32,33,38,53],stack_siz:[27,33],stacking_axi:33,stage:44,stai:48,standard:[7,10,11,15,31,33,41,47],starcraft2_environ:30,starcraft2environ:30,starcraft:[38,49],starcraftobservationtyp:30,start:[3,6,8,11,12,13,14,19,24,28,33,34,40,45,53],state:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,30,33,35,38,39,40,42,44,46,52,53],state_key_with_the_class_index:[2,35],state_spac:30,state_valu:28,statist:[3,10,33,51,53],std:12,stdev:31,steep:31,step:[0,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,30,31,33,39,40,41,42,52,53,54],stepmethod:[8,12,13,21],stochast:[12,42,52],stop:[0,30],store:[0,3,23,26,28,30,33,35,41,42,43,45,51,53,54],store_transitions_only_when_episodes_are_termin:26,str:[0,2,3,4,21,27,28,30,31,33,34,38,53],strategi:[30,49],stream:[19,43],strict:48,string:[0,27,30],structur:[0,3,28,35,39,42,53],stuff:27,style:31,sub:[34,35,36,37,38,39,42,45,54],sub_spac:38,subset:[41,48,52],subtract:24,succeed:30,success:[0,30,52],suffer:41,suffici:28,suffix:[3,27,53],suggest:39,suit:[0,49],suitabl:[43,54],sum:[4,7,10,20,27,28],sum_:[5,12,16,20,21,23,26],summari:[0,3,53],supervis:52,suppli:[3,53],support:[0,3,27,30,31,41,44,45,46,47,49,51,54],sure:[0,3,45,48,53],surrog:7,surround:14,swig:45,swingup:30,symbol:27,sync:[3,27,42,43,53],synchron:[0,27,42,44],system:47,t_max:[10,21],tag:45,take:[0,3,10,11,19,23,24,27,30,31,32,40,41,42,53],taken:[1,2,4,5,6,7,8,11,12,13,16,19,23,24,25,26,27,28,30,31],tanh:[8,13],tar:45,target:[0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,30,33,34,38,39,42,44,53],target_act:34,target_kl_diverg:11,target_network:27,target_success_r:30,targets_horizon:21,task:[0,1,2,30,33,39,41,49],task_index:0,tau:12,td3:52,td3_agent:13,td3algorithmparamet:13,techniqu:[7,11,51,52],technolog:43,teh:27,temperatur:[27,31],temperature_schedul:31,tensor:[3,27,53],tensorboard:0,tensorflow:[0,3,27,53,54],tensorflow_support:27,term:[6,7,11],termin:[3,8,13,28,42,53],test:[0,3,5,6,8,9,10,11,12,13,14,27,39,48,51,54],test_using_a_trace_test:0,text:6,textrm:42,than:[0,3,11,13,27,31,41,44,47,53],thei:[3,23,24,27,31,41,42,43,52,53,54],them:[4,5,10,21,27,28,30,33,38,40,41,44],therefor:[0,8,13,27,32,52],theta:[6,7,8,12,13,14,16,26,31],theta_:[6,7],thi:[0,3,4,5,6,7,8,10,11,13,15,19,21,23,26,27,28,30,31,32,33,34,35,36,38,39,40,41,42,43,44,45,47,48,52,53,54],thing:[41,47],those:[0,3,8,13,14,15,17,18,19,23,28,31,34,42,44,46,52,53],thousand:[11,15,16,17,18,20,24,25,26],thread:27,three:[3,43,44,45,46],threshold:[11,23,33],through:[0,3,4,8,9,10,11,13,14,15,23,24,27,39,40,42,44,53],tild:[8,12,13,14],time:[0,4,24,27,31,34,35,41,44,52],time_limit:40,timestep:[4,10],timid:45,tmp:0,togeth:[3,21,28,42,53],toggl:41,too:11,tool:[41,45,52],top:[27,30,32,33,35,40,41,52],torqu:30,total:[0,3,10,11,20,23,24,28,35,39,41,52,53],total_loss:27,total_return:28,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:31,train:[0,3,19,27,31,36,37,39,40,41,42,43,44,47,48,51,52,53],train_and_sync_network:27,train_on_batch:27,train_to_eval_ratio:35,trainer:[29,43],transfer:[30,36,49],transit:[1,2,3,4,5,6,8,10,11,12,13,14,16,17,18,21,23,24,25,26,35,39,42,43,53],transition_idx:28,tree:14,tri:52,trick:48,tricki:41,trigger:[30,45],truncat:6,truncated_norm:31,trust:[6,52],ttf2:45,tune:31,tupl:[1,2,3,8,13,27,28,30,35,38,39],turn:[2,52],tutori:[39,40,47],tweak:[3,53],twin:3,two:[8,10,13,21,27,30,31,32,33,34,38,40,43,44,52,54],txt:45,type:[0,3,10,19,27,30,33,38,39,42,44,51,52,53,54],typic:[7,11,27,52,54],ubuntu16:45,uhlenbeck:[8,9,31],uint8:33,unbound:38,uncertain:31,uncertainti:31,unchang:11,unclip:[3,39,53],uncorrel:21,undeploi:43,under:[3,27,39,54],underbrac:5,understand:54,unifi:7,uniformli:[30,31,34,38],union:[3,28,30,31,34,38,53],uniqu:27,unit:41,unlik:[11,14],unmask:34,unnecessari:0,unshar:[3,53],unsign:33,unspecifi:27,unstabl:[41,48],until:[0,6,10,11,23,26,31],unus:27,unzip:45,updat:[3,6,7,8,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28,31,39,40,41,42,44,45,52,53],update_discounted_reward:28,update_filter_internal_st:[3,53],update_log:[3,53],update_online_network:27,update_step_in_episode_log:[3,53],update_target_network:27,update_transition_before_adding_to_replay_buff:[3,53],upgrad:45,upon:[3,5,39,53],upper:[6,31],usag:[34,47,51],use:[0,1,2,3,4,5,6,8,9,10,12,13,14,15,17,18,22,27,28,29,30,31,33,34,35,38,39,40,42,44,45,47,51,52,53,54],use_accumulated_reward_as_measur:4,use_cpu:0,use_deterministic_for_evalu:12,use_full_action_spac:30,use_inputs_for_apply_gradi:27,use_kl_regular:[7,11],use_non_zero_discount_for_terminal_st:[8,13],use_separate_networks_per_head:27,use_target_network_for_evalu:[8,13],use_trust_region_optim:6,used:[0,2,3,5,6,7,8,10,11,12,13,14,15,16,20,21,22,23,24,25,27,30,31,33,34,35,36,37,39,40,42,43,44,47,48,53,54],useful:[0,3,4,26,27,31,33,38,48,52,53,54],user:[27,30,31,41,42,45],userguid:45,uses:[0,1,7,11,19,28,29,31,37,42,43,45,48,52,54],using:[0,3,5,6,7,8,10,11,12,13,14,17,18,20,21,22,23,24,26,27,29,30,31,33,36,39,40,41,43,47,49,52,53,54],usr:45,usual:[33,42],util:[3,41,53],v_max:16,v_min:16,val:[3,38,53],valid:[0,14,38],valu:[0,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,19,21,22,23,24,26,27,28,30,31,33,34,35,38,39,42,44,45,46,52,53],valuabl:41,value_targets_mix_fract:[7,11],valueexcept:[3,53],valueoptimizationag:39,van:4,vari:44,variabl:[27,30,45],variable_scop:27,varianc:[10,31,41,52],variant:[31,35,52],variou:[3,28,35,51],vector:[3,4,8,9,11,13,15,27,30,33,38,40,44,52,53],vectorobservationspac:33,verbos:30,veri:[0,7,8,10,13,19,23,41,52,54],version:[7,11,28],versu:27,vert:12,vertic:27,via:[2,15],video:[0,3,30],video_dump_method:0,view:41,viewabl:[3,53],visit:48,visual:[0,3,30,49,51],visualization_paramet:30,visualizationparamet:[3,30],vizdoom:[45,49],vote:31,wai:[3,7,11,31,34,40,42,44,51,52,53,54],wait:[5,27,43],walk:40,want:[3,4,26,27,33,34,35,47,53],warn:[31,33,34],wasn:28,weather_id:30,websit:[30,51],weight:[4,5,6,7,8,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,31,42,44,52],well:[23,27,31,38,52],went:11,were:[4,16,17,18,19,23,25,26,27,28,34,48],west:45,wget:45,what:[11,47,52],whatev:[3,53],when:[0,3,4,5,6,7,8,9,10,11,12,13,14,23,27,28,29,30,31,33,36,37,39,40,41,53,54],whenev:43,where:[2,3,4,5,6,7,11,15,16,19,21,23,24,26,27,28,30,31,33,34,38,41,47,52,53],whether:31,which:[0,1,2,3,5,6,7,8,10,11,12,13,14,15,19,21,22,23,24,25,27,28,29,30,31,33,35,36,37,38,39,40,41,42,43,44,46,47,48,49,51,52,53,54],who:42,why:[41,42],window:[33,34],wise:33,within:[0,7,11,22,31,38,41],without:[5,11,34,35,41,52,54],wolperting:3,wolpertinger_ag:14,wolpertingeralgorithmparamet:14,won:[4,27],wont:27,work:[3,21,27,31,33,34,41,42,52,53,54],workaround:0,workdir:45,worker:[0,21,27,29,33,35,36,37,41,43,44,45,52,54],worker_devic:27,worker_host:0,wors:52,would:[27,45,47,52],wrap:[30,33,42,49],wrapper:[3,27,28,30,38,44,53],write:[0,3,53],written:[3,26,29,53],www:45,xdist:45,y_t:[8,12,13,14,15,17,18,20,22,23,24],year:52,yet:[19,40],you:[4,33,35,39,40,45,51,54],your:[39,40,45,54],yuv:33,z_i:[16,26],z_j:[16,26],zero:[2,13,17,18],zip:45,zlib1g:45},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","ACER","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Soft Actor-Critic","Twin Delayed Deep Deterministic Policy Gradient","Wolpertinger","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Batch Reinforcement Learning","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":23,"function":22,"new":[39,40],"switch":54,Adding:[39,40],Using:40,acer:6,across:52,action:[4,5,6,7,8,9,10,11,12,13,14,15,22,23,34,38,52],actioninfo:28,actor:[5,9,12],addit:[0,54],additivenois:31,advantag:[22,24],agent:[3,39,42,54],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,46,52,54],api:40,architectur:27,attentionactionspac:38,backend:36,balancedexperiencereplai:35,batch:[28,47],behavior:1,benchmark:48,between:54,blizzard:30,boltzmann:31,bootstrap:[15,31],boxactionspac:38,build:45,can:52,carla:30,carlo:20,categor:[16,31],choos:[4,5,6,7,8,9,10,11,12,13,14,15,22,23],clip:7,clone:[1,45],coach:[40,41,43,45,51],collect:52,compar:41,compoundactionspac:38,condit:2,config:45,contain:45,continu:[7,11,12,52],continuousentropi:31,control:[23,30,42],copi:44,core:28,creat:45,critic:[5,9,12],dashboard:41,data:29,deep:[8,13,18,54],deepmind:30,delai:13,demonstr:52,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26],design:44,determinist:[8,13],direct:4,discret:[5,6,10,52],discreteactionspac:38,distribut:[43,45],distributedtaskparamet:0,doe:52,doubl:17,dqn:[15,16,17,19,25],duel:19,dump:54,egreedi:31,environ:[30,40,49,52,54],envrespons:28,episod:[23,28,35],episodicexperiencereplai:35,episodichindsightexperiencereplai:35,episodichrlhindsightexperiencereplai:35,evalu:54,experiencereplai:35,explor:31,explorationpolici:31,featur:50,file:45,filter:[32,33,34],flag:54,flow:42,framework:54,from:52,futur:4,gener:19,gif:54,goal:38,gradient:[8,10,13],graph:42,greedi:31,gym:[30,40],have:52,hierarch:9,horizont:43,human:[52,54],imag:45,imageobservationspac:38,imit:[2,54],implement:45,input:33,interfac:45,keep:44,kubernet:37,learn:[2,21,24,47,51,54],level:42,manag:42,memori:[35,36],mix:20,mont:20,more:52,multi:54,multipl:52,multiselectactionspac:38,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,44],networkwrapp:27,neural:23,nfsdatastor:29,node:[52,54],non:35,normal:22,observ:[33,38],observationclippingfilt:33,observationcropfilt:33,observationmoveaxisfilt:33,observationnormalizationfilt:33,observationreductionbysubpartsnamefilt:33,observationrescalesizebyfactorfilt:33,observationrescaletosizefilt:33,observationrgbtoyfilt:33,observationsqueezefilt:33,observationstackingfilt:33,observationtouint8filt:33,openai:[30,40],optim:[7,11],orchestr:37,ouprocess:31,out:43,output:34,pain:52,parallel:52,paramet:0,parameternois:31,persist:24,plai:54,planarmapsobservationspac:38,polici:[7,8,10,11,13,31],predict:4,prerequisit:45,presetvalidationparamet:0,prioritizedexperiencereplai:35,process:52,proxim:[7,11],push:45,qdnd:35,quantil:25,rainbow:26,redispubsubbackend:36,regress:25,reinforc:[47,51],render:54,repositori:45,reward:33,rewardclippingfilt:33,rewardnormalizationfilt:33,rewardrescalefilt:33,run:[41,45],s3datastor:29,sampl:52,scale:43,select:52,signal:41,simul:52,singl:54,singleepisodebuff:35,soft:12,solv:52,space:[38,52],starcraft:30,statist:41,step:21,store:[15,29],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26],suit:30,support:43,sync:44,synchron:43,task:52,taskparamet:0,test:53,thread:54,through:54,track:41,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,54],transit:[15,28],transitioncollect:35,truncatednorm:31,twin:13,type:[28,43],ucb:31,usag:[45,54],vectorobservationspac:38,visual:[41,54],visualizationparamet:0,vizdoom:30,wolperting:14,you:52,your:52}})