mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
1 line
59 KiB
JavaScript
1 line
59 KiB
JavaScript
Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/acer","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/policy_optimization/sac","components/agents/policy_optimization/td3","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/acer.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/policy_optimization/sac.rst","components/agents/policy_optimization/td3.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.acer_agent":{ACERAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],freeze_memory:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],initialize_session_dependent_components:[3,1,1,""],learn_from_batch:[3,1,1,""],load_memory_from_file:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,1,1,""],phase:[3,1,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_off_policy_evaluation:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[15,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[8,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[51,0,1,""],DQNAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[51,1,1,""],call_memory:[51,1,1,""],choose_action:[51,1,1,""],collect_savers:[51,1,1,""],create_networks:[51,1,1,""],freeze_memory:[51,1,1,""],get_predictions:[51,1,1,""],get_state_embedding:[51,1,1,""],handle_episode_ended:[51,1,1,""],improve_reward_model:[51,1,1,""],init_environment_dependent_modules:[51,1,1,""],initialize_session_dependent_components:[51,1,1,""],learn_from_batch:[51,1,1,""],load_memory_from_file:[51,1,1,""],log_to_screen:[51,1,1,""],observe:[51,1,1,""],parent:[51,1,1,""],phase:[51,1,1,""],post_training_commands:[51,1,1,""],prepare_batch_for_inference:[51,1,1,""],register_signal:[51,1,1,""],reset_evaluation_state:[51,1,1,""],reset_internal_state:[51,1,1,""],restore_checkpoint:[51,1,1,""],run_off_policy_evaluation:[51,1,1,""],run_pre_network_filter_for_inference:[51,1,1,""],save_checkpoint:[51,1,1,""],set_environment_parameters:[51,1,1,""],set_incoming_directive:[51,1,1,""],set_session:[51,1,1,""],setup_logger:[51,1,1,""],sync:[51,1,1,""],train:[51,1,1,""],update_log:[51,1,1,""],update_step_in_episode_log:[51,1,1,""],update_transition_before_adding_to_replay_buffer:[51,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[22,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[23,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[11,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[24,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[25,0,1,""]},"rl_coach.agents.soft_actor_critic_agent":{SoftActorCriticAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.td3_agent":{TD3AlgorithmParameters:[13,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[26,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[26,1,1,""],apply_and_reset_gradients:[26,1,1,""],apply_gradients:[26,1,1,""],collect_savers:[26,1,1,""],construct:[26,1,1,""],get_variable_value:[26,1,1,""],get_weights:[26,1,1,""],parallel_predict:[26,1,1,""],predict:[26,1,1,""],reset_accumulated_gradients:[26,1,1,""],set_variable_value:[26,1,1,""],set_weights:[26,1,1,""],train_on_batch:[26,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[26,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[26,1,1,""],apply_gradients_to_global_network:[26,1,1,""],apply_gradients_to_online_network:[26,1,1,""],collect_savers:[26,1,1,""],parallel_prediction:[26,1,1,""],set_is_training:[26,1,1,""],sync:[26,1,1,""],train_and_sync_networks:[26,1,1,""],update_online_network:[26,1,1,""],update_target_network:[26,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[26,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[27,0,1,""],Batch:[27,0,1,""],EnvResponse:[27,0,1,""],Episode:[27,0,1,""],Transition:[27,0,1,""]},"rl_coach.core_types.Batch":{actions:[27,1,1,""],game_overs:[27,1,1,""],goals:[27,1,1,""],info:[27,1,1,""],info_as_list:[27,1,1,""],n_step_discounted_rewards:[27,1,1,""],next_states:[27,1,1,""],rewards:[27,1,1,""],shuffle:[27,1,1,""],size:[27,1,1,""],slice:[27,1,1,""],states:[27,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[27,1,1,""],get_last_transition:[27,1,1,""],get_transition:[27,1,1,""],get_transitions_attribute:[27,1,1,""],insert:[27,1,1,""],is_empty:[27,1,1,""],length:[27,1,1,""],update_discounted_rewards:[27,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[28,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[28,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[29,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[29,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[29,0,1,""]},"rl_coach.environments.environment":{Environment:[29,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[29,1,1,""],close:[29,1,1,""],get_action_from_user:[29,1,1,""],get_available_keys:[29,1,1,""],get_goal:[29,1,1,""],get_random_action:[29,1,1,""],get_rendered_image:[29,1,1,""],goal_space:[29,1,1,""],handle_episode_ended:[29,1,1,""],last_env_response:[29,1,1,""],phase:[29,1,1,""],render:[29,1,1,""],reset_internal_state:[29,1,1,""],set_goal:[29,1,1,""],state_space:[29,1,1,""],step:[29,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[29,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[29,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[30,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[30,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[30,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[30,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[30,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[30,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[30,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[30,1,1,""],get_action:[30,1,1,""],requires_action_values:[30,1,1,""],reset:[30,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[30,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[30,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[30,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[30,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[30,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[33,0,1,""],BoxDiscretization:[33,0,1,""],BoxMasking:[33,0,1,""],FullDiscreteActionSpaceMap:[33,0,1,""],LinearBoxToBoxMap:[33,0,1,""],PartialDiscreteActionSpaceMap:[33,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[32,0,1,""],ObservationCropFilter:[32,0,1,""],ObservationMoveAxisFilter:[32,0,1,""],ObservationNormalizationFilter:[32,0,1,""],ObservationRGBToYFilter:[32,0,1,""],ObservationReductionBySubPartsNameFilter:[32,0,1,""],ObservationRescaleSizeByFactorFilter:[32,0,1,""],ObservationRescaleToSizeFilter:[32,0,1,""],ObservationSqueezeFilter:[32,0,1,""],ObservationStackingFilter:[32,0,1,""],ObservationToUInt8Filter:[32,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[32,0,1,""],RewardNormalizationFilter:[32,0,1,""],RewardRescaleFilter:[32,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[35,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[34,0,1,""],EpisodicHRLHindsightExperienceReplay:[34,0,1,""],EpisodicHindsightExperienceReplay:[34,0,1,""],SingleEpisodeBuffer:[34,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[34,0,1,""],ExperienceReplay:[34,0,1,""],PrioritizedExperienceReplay:[34,0,1,""],QDND:[34,0,1,""],TransitionCollection:[34,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[36,0,1,""]},"rl_coach.spaces":{ActionSpace:[37,0,1,""],AttentionActionSpace:[37,0,1,""],BoxActionSpace:[37,0,1,""],CompoundActionSpace:[37,0,1,""],DiscreteActionSpace:[37,0,1,""],GoalsSpace:[37,0,1,""],ImageObservationSpace:[37,0,1,""],MultiSelectActionSpace:[37,0,1,""],ObservationSpace:[37,0,1,""],PlanarMapsObservationSpace:[37,0,1,""],Space:[37,0,1,""],VectorObservationSpace:[37,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[37,1,1,""],contains:[37,1,1,""],is_valid_index:[37,1,1,""],sample:[37,1,1,""],sample_with_info:[37,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[37,0,1,""],clip_action_to_space:[37,1,1,""],contains:[37,1,1,""],distance_from_goal:[37,1,1,""],get_reward_for_goal_and_state:[37,1,1,""],goal_from_state:[37,1,1,""],is_valid_index:[37,1,1,""],sample:[37,1,1,""],sample_with_info:[37,1,1,""]},"rl_coach.spaces.ObservationSpace":{contains:[37,1,1,""],is_valid_index:[37,1,1,""],sample:[37,1,1,""]},"rl_coach.spaces.Space":{contains:[37,1,1,""],is_valid_index:[37,1,1,""],sample:[37,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"]},objtypes:{"0":"py:class","1":"py:method"},terms:{"100x100":33,"160x160":32,"1_0":[15,25],"1st":30,"20x20":33,"210x160":32,"2nd":30,"50k":41,"9_amd64":44,"abstract":[38,42],"boolean":[3,27,37,51],"break":40,"case":[0,3,5,22,26,27,30,37,50,51,52],"class":[0,1,2,3,4,5,6,7,8,10,11,12,13,15,17,19,20,21,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,38,39,41,45,51],"default":[0,30,52],"enum":[26,29,37],"export":[0,26,44],"final":[8,13,16,17,19,23,41],"float":[3,4,5,6,7,8,10,11,12,13,15,19,22,23,24,26,27,29,30,32,33,34,37,38,51],"function":[0,1,3,6,7,8,11,13,26,29,30,37,38,39,41,43,51],"import":[6,18,30,34,39,50,52],"int":[0,3,4,5,6,7,10,15,20,22,24,25,27,29,30,32,33,34,37,51],"long":43,"new":[0,3,7,8,11,12,13,22,23,26,27,33,41,42,49,50,51],"return":[0,3,8,10,11,13,14,19,22,23,25,26,27,29,30,32,34,37,38,39,41,50,51],"short":[0,41],"static":26,"super":[38,39],"switch":[0,40],"true":[0,3,4,5,6,7,8,11,12,13,22,23,25,26,27,29,30,33,34,37,51],"try":[4,46,50],"while":[0,5,6,8,9,10,11,12,13,26,29,40,43,50,52],AWS:44,Adding:[18,49],And:[39,50],But:[40,50],Doing:50,For:[0,1,2,3,4,7,10,14,15,16,17,20,22,23,26,27,29,30,31,32,33,37,38,39,41,42,43,44,46,51,52],Has:26,Its:51,NFS:[28,44],One:[24,50,52],That:40,The:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,19,20,21,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,38,40,41,42,43,44,46,47,49,50,51,52],Then:[4,7,8,13,14,21,23],There:[7,11,26,30,31,38,39,43,52],These:[1,2,3,24,29,36,42,43,44],Use:[1,2,8,13,21,22],Used:30,Uses:50,Using:[8,13,14,16,17,44],Will:26,With:[30,49],__init__:[29,38,39],_index:[5,20],_nois:13,_render:39,_restart_environment_episod:39,_take_act:39,_update_st:39,a2c:50,a3c:[10,20,40,50],a_i:22,a_t:[4,5,6,8,12,13,14,15,16,17,19,20,21,23,25],a_valu:5,abl:[33,50],about:[3,27,41,51,52],abov:[8,12,13,26,41],abs:[20,34],absolut:30,acceler:21,accept:29,access:[26,38,44],accord:[0,3,4,5,6,8,12,13,14,20,26,27,30,37,40,41,43,51],accordingli:[22,37,41,52],account:[4,7,11,22,23,30],accumul:[3,4,5,6,10,20,22,25,26,32,50,51],accumulate_gradi:26,accumulated_gradi:26,accur:50,acer:[3,50],acer_ag:6,aceralgorithmparamet:6,achiev:[0,4,7,29,32,34,37,46,50,52],acquir:12,across:[10,19,40],act:[3,4,8,13,14,24,37,38,41,51],action:[1,2,3,15,16,17,18,19,20,23,24,25,26,27,29,30,31,34,38,39,41,43,51],action_idx:39,action_penalti:[8,13],action_spac:[29,30],action_space_s:26,action_valu:[27,30],actioninfo:[3,37,41,51],actionspac:[30,37],actiontyp:39,activ:[8,13,26],actor:[3,6,7,8,11,13,30,43,50],actor_critic_ag:5,actorcriticag:38,actorcriticalgorithmparamet:5,actual:[4,5,15,16,17,24,25,30,33,34],adam:[7,26],adam_optimizer_beta1:26,adam_optimizer_beta2:26,adapt:[7,11],add:[8,9,13,21,27,30,32,39,41,44,50],add_rendered_image_to_env_respons:0,added:[0,4,6,7,10,11,22,30,34,38],adding:[3,11,30,38,51],addit:[3,26,27,29,30,32,34,37,39,40,41,43,49,50,51],addition:[26,29,32,38,39,41,46,47,52],additional_fetch:26,additional_simulator_paramet:[29,39],additionali:40,additive_nois:30,additivenoiseparamet:30,address:13,advanc:[25,49],advantag:[3,5,7,11,18,30],affect:[0,14,26],aforement:[16,17,23],after:[0,3,8,10,11,12,20,21,23,25,26,27,29,32,37,51,52],again:30,against:3,agent:[0,1,2,4,5,6,7,8,10,11,12,13,15,17,19,20,21,22,23,24,25,26,27,29,30,31,32,33,37,39,40,43,45,46,49,50,51],agent_param:42,agent_paramet:[3,26,51],agentparamet:[3,26,38],aggreg:41,ahead:[4,50],aim:30,algorithm:[3,27,30,38,40,41,42,46,48,49,51],algorithmparamet:[3,38],all:[0,3,10,14,22,23,26,27,29,30,32,33,37,38,39,40,41,42,43,44,47,51,52],all_action_prob:27,allow:[0,3,4,13,18,26,27,29,30,31,32,33,34,40,41,42,43,49,50,51,52],allow_brak:29,allow_duplicates_in_batch_sampl:34,allow_no_action_to_be_select:37,along:[22,29,30,47],alpha:[6,19,23,34],alreadi:[22,27,39,50],also:[5,6,7,22,23,26,29,37,38,40,46,50,52],altern:[29,39,47],alwai:[26,30,33],amazon:44,amazonaw:44,amount:[8,10,13,19,23,30,41,50],analysi:40,analyz:40,ani:[3,26,27,29,33,34,38,41,42,43,44,51],anoth:[3,18,26,31,51],answer:50,anymor:[3,51],api:[29,43,47,49],appear:[3,51],appli:[0,3,5,8,10,13,20,26,27,30,32,50,51],applic:50,apply_and_reset_gradi:26,apply_gradi:26,apply_gradients_and_sync_network:26,apply_gradients_every_x_episod:[5,10,20],apply_gradients_to_global_network:26,apply_gradients_to_online_network:26,apply_stop_condit:0,appropri:44,approx:[8,12,13],approxim:[12,13,43,50],apt:44,arbitrari:32,architectur:[3,18,38,49,51],architecture_num_q_head:30,area:33,arg:[3,26,44,51],argmax_a:[16,19,23],argument:[3,15,25,26,29,37,41,51],around:[26,27,43],arrai:[3,26,27,29,32,37,39,51],art:[3,45],artifact:44,artifici:34,arxiv:[20,34],aspect:[30,32,40],assign:[0,2,5,6,26,30],assign_kl_coeffici:26,assign_op:26,assum:[27,30,32,34,50],async:[26,42],async_train:26,asynchron:[5,20,26],atari:[17,29,32,44,52],atari_a3c:52,atari_dqn:52,ath:18,atom:[15,24,25],attach:29,attempt:0,attend:33,attent:33,attentionactionspac:33,attentiondiscret:33,attribut:27,attribute_nam:27,author:[29,46,47],auto_select_all_armi:29,autoclean:44,automat:[26,52],autonom:[29,47,49],autoremov:44,auxiliari:[29,47],avail:[4,26,27,29,30,40,42,44,49,50,52],averag:[6,7,11,26,40,41],avg:6,aws:44,axes:[32,40],axi:[32,40],axis_origin:32,axis_target:32,back:[7,42],backend:[26,42,44,49,52],background:52,backpropag:22,backward:26,balanc:2,band:40,bar:6,base1:44,base64:44,base:[7,11,12,19,21,23,29,34,38,41,44,47,50,51],base_paramet:[0,3,26,29,30],baselin:50,basic:[10,27,42,52],batch:[1,2,3,4,5,6,8,10,11,12,13,14,15,16,17,18,20,23,24,25,26,34,38,41,51],batch_siz:26,bc_agent:1,bcalgorithmparamet:1,becaus:41,becom:[8,13,42],been:[18,27,32,46,50],befor:[0,3,5,11,13,25,26,27,32,41,42,43,44,50,51],begin:[0,4,41],behav:37,behavior:[3,32,34,38,46,50,51,52],being:[3,38,49,50,51],bellman:[15,24,25],benchmark:[40,48,49,50],best:[50,52],beta1:26,beta2:26,beta:[6,8,10,13,34],beta_entropi:[5,6,7,10,11],better:[13,18,50],between:[0,1,2,3,6,7,8,10,11,12,13,15,19,20,22,24,25,26,27,29,30,33,34,37,38,40,41,43,49,50],bfg:[7,11],bia:[6,50],big:[11,15,25],bin:[33,44],binari:14,bind:26,binomi:14,bit:32,blizzard:47,blob:[29,32],block:49,blog:49,boilerpl:41,bolling:40,bool:[0,3,4,5,6,7,8,11,12,13,22,23,25,26,27,29,30,34,37,51],boost:[44,50],bootstrap:[3,5,6,7,8,11,13,19,20,22,23,25,27,50],bootstrap_total_return_from_old_polici:[22,27],both:[3,7,26,29,30,33,50,51],bound:[6,7,11,15,25,30,37,50],box2d:44,box:[30,33,37],boxactionspac:33,boxdiscret:33,boxmask:33,breakout:52,breakoutdeterminist:[29,52],bring:11,bucket:44,buffer:[1,2,3,6,12,14,15,16,17,20,22,23,24,25,34,41,50,51,52],build:[31,49,50],builder:44,built:[38,41],bullet:6,button:[40,52],c51:15,cach:44,cadenc:13,calcul:[3,4,5,6,7,8,10,11,13,14,15,16,17,19,20,22,23,24,25,26,27,30,34,38,51],call:[0,3,10,20,26,27,29,41,51],call_memori:[3,51],callabl:37,camera:[29,39],camera_height:29,camera_width:29,cameratyp:[29,39],can:[0,2,3,5,6,7,8,11,12,13,23,26,27,29,30,31,32,33,37,38,39,40,41,43,47,49,51,52],cannot:[3,51],carla:[32,47],carla_environ:29,carlaenviron:29,carlaenvironmentparamet:29,carlo:[3,23],cartpol:[29,39],cartpole_a3c:52,cartpole_clippedppo:[44,52],cartpole_dqn:52,categor:[3,5,6,50],categori:[31,32],categorical_dqn_ag:15,categoricaldqnalgorithmparamet:15,caus:[32,40],cdot:[5,7,8,10,12,13,14,15,16,17,19,21,23,25],central:[26,40],certainti:30,chain:[8,13],challeng:41,chang:[0,3,6,7,8,11,13,14,18,20,23,30,41,44,51],change_phas:30,channel:[29,32],channels_axi:37,check:[0,3,27,37,51],checkpoint:[0,3,26,28,42,44,51,52],checkpoint_dir:[3,51],checkpoint_prefix:[3,51],checkpoint_restore_dir:[0,52],checkpoint_restore_path:0,checkpoint_save_dir:0,checkpoint_save_sec:0,child:26,chmod:44,choic:[38,44],choos:[3,18,23,30,31,33,37,38,41,43,50,51,52],choose_act:[3,38,41,51],chosen:[3,12,23,30,33,38,51],chunk:11,cil:50,cil_ag:2,cilalgorithmparamet:2,classic_control:44,clean:[29,38,44],cli:44,clip:[3,6,8,11,13,26,32,37,50],clip_action_to_spac:37,clip_critic_target:[8,13],clip_gradi:26,clip_high:30,clip_likelihood_ratio_using_epsilon:[7,11],clip_low:30,clip_max:32,clip_min:32,clipbyglobalnorm:26,clipped_ppo_ag:7,clippedppoalgorithmparamet:7,clipping_high:32,clipping_low:32,clone:[3,50],close:29,cmake:44,coach:[0,3,26,28,29,30,31,35,36,38,41,45,46,47,50,52],code:[39,41,50],coeffici:[7,11,26,30,34],collect:[3,7,10,11,20,26,27,34,41,46,49,51,52],collect_sav:[3,26,51],color:32,com:44,combin:[25,43,49,50],comma:0,command:[41,44,52],common:[38,40,44,52],commun:42,compar:[0,11,18,50],complet:[27,30,41],complex:[26,31,41,43,50,52],compon:[3,15,25,26,30,36,38,41,49,51,52],composit:[3,51],compositeag:[3,51],comput:[26,30],concat:26,concentr:41,condit:[0,3],confid:30,config:[29,52],configur:[3,5,10,38,44,51],confus:41,connect:[12,26],connectionist:10,consecut:[8,13,22],consequ:[20,30],consid:[5,6,30,33,40],consist:[8,13,29,32,33,37,41,47],constant:6,constantli:52,constantschedul:34,constrain:33,construct:[12,26,34],consumpt:32,contain:[0,1,2,3,14,26,27,29,37,39,41,51,52],content:44,contin:42,continu:[1,2,5,8,9,10,13,21,30,31,33,37,46],continuous_entropi:30,continuous_exploration_policy_paramet:30,contribut:[4,49],control:[2,3,5,6,7,8,11,26,30,32,40,47,49,50,51],control_suite_environ:29,controlsuiteenviron:29,conveni:[40,52],converg:10,convers:31,convert:[3,27,30,32,37,41,43,51],convolut:[26,43],coordin:33,copi:[8,12,13,14,15,16,17,19,20,21,23,24,25,26,44],core:[3,49,51],core_typ:[3,27,29,37,51,52],correct:[3,6,50],correctli:26,correl:30,correpond:27,correspond:[2,3,4,15,16,26,27,30,32,37,39,51],could:[3,26,37,44,51],count:19,countabl:33,counter:[3,51],counterpart:43,cpu:[0,26],crd:52,creat:[3,20,26,32,39,51,52],create_network:[3,51],create_target_network:26,creation:[3,51],credenti:44,critic:[3,6,7,8,11,13,30,43,50],crop:[32,33],crop_high:32,crop_low:32,cross:[1,15,25],csv:0,ctrl:40,cuda:44,cudnn7:44,curl:44,curr_stat:[3,38,51],current:[0,1,2,3,4,6,7,8,9,10,11,12,13,14,16,17,19,21,22,23,24,26,27,29,30,32,33,37,38,41,49,50,51],custom:[29,30,37,38,41],custom_reward_threshold:29,cycl:41,dai:52,dashboard:[0,3,44,49,51],data:[0,3,10,20,26,34,41,42,44,46,49,50,51,52],data_stor:[28,44],dataset:[3,7,11,50,51,52],date:[22,43,50,52],dcp:[44,52],ddpg:50,ddpg_agent:8,ddpgalgorithmparamet:8,ddqn:[19,23,50],deal:50,debug:[0,40,49],decai:[5,7,11,26],decid:[0,3,4,29,30,38,51],decis:[3,51],declar:0,decod:44,dedic:26,deep:[0,3,5,12,14,16,18,20,21,25,51],deepmind:47,def:[38,39],default_act:37,default_input_filt:39,default_output_filt:39,defin:[0,3,5,6,7,10,11,12,20,22,23,26,27,29,30,32,33,34,37,38,39,41,42,43,46,47,51,52],definit:[3,26,29,37,39,41,51],delai:[3,50],delta:[6,15,22,25],demonstr:[1,2,52],dens:30,densiti:19,depecr:0,depend:[0,3,6,26,32,34,37,39,44,46,50,51],deploi:[36,42],depth:29,descend:50,describ:[3,15,24,32,34,38,41,44,51],descript:[3,33,37,45,52],design:[41,44,49],desir:[33,38],destabil:10,detail:[3,27,45,47,49,52],determin:[2,3,22,27,34,51],determinist:[3,12,50],dev:44,develop:[41,46],deviat:[10,11,30,32,40],devic:26,dfp:50,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,26,27,29,30,37,51],dict_siz:34,dictat:4,dictionari:[2,3,26,27,29,34,37,38,51],did:29,differ:[0,1,2,3,4,5,6,7,10,11,14,18,26,29,30,32,37,38,39,40,42,43,49,50,51],differenti:18,difficult:[40,46],difficulti:52,dimens:[27,29,32,33],dimension:[11,33],dir:[0,3,51,52],direct:[3,29,51],directli:[3,5,41,43,51],directori:[0,26,38,40,44,52],disabl:52,disable_fog:29,disappear:29,disassembl:50,discard:[27,32],discount:[8,10,11,13,19,22,23,25,26,27,50],discret:[1,2,4,7,11,14,15,16,17,18,19,20,22,23,24,25,30,31,32,33,37,41],disentangl:41,disk:0,displai:[0,40],distanc:37,distance_from_go:37,distance_metr:37,distancemetr:37,distil:[3,51],distribut:[5,6,10,11,12,15,24,25,26,28,30,35,36,37,43,49,50,52],distributed_coach:42,distributed_coach_synchronization_typ:42,distributedcoachsynchronizationtyp:42,divereg:[7,11],diverg:[6,7,11,25],dnd:[0,22,50],dnd_key_error_threshold:22,dnd_size:22,do_action_hindsight:34,doc:44,docker:44,dockerfil:44,document:47,doe:[14,26,32],doesn:42,doing:[7,11,31],domain:43,don:[4,30,40,50],done:[0,3,7,10,11,13,29,32,39,51,52],doom:[29,39,44,47],doom_basic_bc:52,doom_basic_dqn:52,doom_environ:[29,39,52],doomenviron:[29,39],doomenvironmentparamet:[39,52],doominputfilt:39,doomlevel:29,doomoutputfilt:39,doubl:[3,19,25],doubli:51,down:[26,29,50],download:44,dpkg:44,dqn:[3,19,20,25,29,30,32,33,41,43,50],dqn_agent:[17,51],dqnagent:51,dqnalgorithmparamet:17,drive:[2,29,47,49],driving_benchmark:29,due:32,duel:[3,25],dump:[0,3,51],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,51],dump_one_value_per_step:[3,51],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,7,10,11,12,14,22,30,40,41,51,52],dynam:[40,46,50],e_greedi:30,each:[0,1,2,3,4,5,6,7,10,11,12,14,16,17,18,20,22,23,24,26,27,29,30,31,32,33,34,37,38,40,41,42,43,44,46,50,51],eas:40,easi:[39,40,49],easier:43,easili:[30,52],echo:44,effect:[0,3,6,7,20,32,41,51],effici:[6,41,50],either:[0,3,5,20,26,30,37,40,43,52],element:[3,14,26,32,37],elf:44,embbed:26,embed:[3,22,26,51],embedd:[26,43],embedding_merger_typ:26,embeddingmergertyp:26,emploi:50,empti:27,emul:6,enabl:[26,43,52],encod:[32,37],encourag:[21,23,41],end:[2,3,10,25,27,29,32,51,52],enforc:33,engin:[29,47],enough:[4,6,22],ensembl:[30,50],ensur:[6,26],enter:[3,51,52],entir:[11,19,22,25,30,33,41],entri:[22,41],entropi:[1,5,6,7,10,11,12,15,25,30,50],enumer:37,env:[27,44],env_param:39,env_respons:[3,51],enviorn:29,environ:[0,3,4,6,18,26,27,30,31,32,33,37,38,41,44,46,48,49,51],environmentparamet:[29,39],envrespons:[0,3,29,51],episod:[0,3,4,5,10,11,14,19,20,25,29,30,38,39,40,41,42,51,52],episode_max_tim:29,episodic_hindsight_experience_replai:34,epoch:[7,51],epsilon:[7,30,34],epsilon_schedul:30,equal:2,equat:[8,12,13,16,17,20,24],error:[13,26,50],escap:52,especi:18,essenti:[20,26,33,39,41,44],estim:[3,5,7,11,14,19,23,30,51],estimate_state_value_using_ga:[5,7,11],eta:[7,11],etc:[0,3,26,29,31,37,38,47,51],evalu:[0,3,12,26,27,30,41,51],evaluate_onli:0,evaluation_epsilon:30,evaluation_nois:30,even:[18,26,29,39,40,41,50],everi:[0,5,6,8,10,12,13,14,15,16,17,19,20,21,23,24,25,52],exact:[22,30,46],exactli:26,exampl:[2,3,4,26,27,29,30,31,32,33,37,38,39,41,43,51,52],except:[20,27],execut:[27,40,41],exercis:13,exhibit:[3,38,51],exist:[22,26],exit:[3,51],expand_dim:27,expect:[0,3,30,46,51],experi:[0,6,8,11,12,13,25,29,34,35,40,41,42,44,49,50,52],experiment_path:[0,29],experiment_suit:29,experimentsuit:29,expert:[1,2,27,50],exploit:[30,41],explor:[3,4,5,6,7,8,9,11,13,14,19,21,22,38,41,49,50],exploration_polici:30,explorationparamet:[3,30,38],exponenti:[6,7,11,25,26],expor:3,export_onnx_graph:0,expos:[40,43,49],extend:[29,30,47],extens:[29,47],extent:52,extern:0,extra:[3,26,27,43,51],extract:[3,21,22,27,32,37,40,41,51],factor:[8,10,11,13,23,25,26,27,30,32],failur:0,faithfulli:40,fake:37,fals:[0,3,8,13,26,27,29,30,33,34,37,39,51],far:[11,32,41,46],faster:[18,50],featur:[8,13,29,43,49,50],feature_minimap_maps_to_us:29,feature_screen_maps_to_us:29,fetch:[26,27],fetched_tensor:26,few:[10,14,15,16,17,19,23,24,25,30,39],field:[46,49],file:[0,3,38,41,51,52],fill:[27,39],filter:[0,3,49,51],find:[16,40,47,49],finish:[22,52],finit:33,first:[0,8,11,13,14,22,24,25,26,27,32,41,43],fit:[13,37],flag:[0,3,26,27,29,51],flexibl:42,flicker:29,flow:[31,49],follow:[2,3,5,6,8,10,12,13,15,16,17,20,21,22,24,25,26,27,29,30,34,38,39,44,46,50,51],footprint:32,forc:[26,29,33,39],force_cpu:26,force_environment_reset:[29,39],force_int_bin:33,forced_attention_s:37,form:[4,20,37,50],format:38,formul:[5,6],forward:[26,30],found:[3,45,52],frac:[6,7,12,15,25],fraction:[7,11],frame:[0,29],frame_skip:29,framework:[0,3,26,38,49,51],framework_typ:0,free:[29,47],freeglut3:44,freez:[3,51],freeze_memori:[3,51],frequenc:13,from:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,29,30,31,32,33,35,37,38,39,40,41,42,43,44,46,47,49,51,52],full:[3,10,19,33,51],fulldiscreteactionspacemap:33,fulli:26,func:[3,51],futur:[0,3,10,27,50],future_measurements_weight:4,gae:[5,7,11],gae_lambda:[5,7,11],game:[3,27,29,47,49,51,52],game_ov:27,gamma:[5,6,8,12,13,14,15,16,17,19,20,21,22,23,25],gap:[23,50],gather:42,gaussian:[11,12,13,30],gener:[0,5,7,11,14,26,29,30,34,37,38,44,52],general_network:38,get:[3,4,7,8,9,10,11,13,14,16,17,19,21,23,26,27,29,30,37,41,43,44,46,51],get_act:30,get_action_from_us:29,get_available_kei:29,get_first_transit:27,get_goal:29,get_last_env_respons:29,get_last_transit:27,get_output_head:38,get_predict:[3,51],get_random_act:29,get_rendered_imag:[29,39],get_reward_for_goal_and_st:37,get_state_embed:[3,51],get_transit:27,get_transitions_attribut:27,get_variable_valu:26,get_weight:26,gfortran:44,gif:0,git:44,github:[39,44,46,49],given:[0,1,2,3,4,5,8,10,11,13,26,27,29,30,32,33,34,37,38,41,51],given_weight:26,global:[3,26,43,51],global_network:26,glx:44,goal:[1,2,3,4,6,26,27,29,34,41,43,50,51],goal_from_st:37,goal_nam:37,goal_spac:29,goal_vector:4,goals_spac:34,goalsspac:[34,37],goaltorewardconvers:37,going:31,good:[39,40],gpu:[0,26],gracefulli:52,gradient:[3,5,6,7,11,20,22,26,38,50,51],gradientclippingmethod:26,gradients_clipping_method:26,granular:34,graph:0,graphmanag:41,grayscal:[32,37],greedili:41,group:40,grow:25,guidelin:50,gym:[44,47],gym_environ:[29,52],gymenviron:29,gymenvironmentparamet:39,hac:50,had:46,hand:[18,32,41,50],handl:4,handle_episode_end:[3,29,51],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[40,50],harder:40,has:[0,3,18,22,23,27,30,32,41,43,46,50,51],has_glob:26,has_target:26,hat:[6,7,15,25],have:[0,3,4,6,26,29,30,32,33,34,41,43,46,51],head:[1,2,3,5,6,10,14,18,21,22,26,30,38,43,51],headparamet:26,heads_paramet:26,health_gath:29,heat:6,heatup:[30,41],help:[23,27,40,41,50],here:[39,41],heurist:[11,30],hide:43,hierarch:[37,41],hierarchi:[3,41,50,51],high:[8,11,13,32,33,37,40],high_i:37,high_kl_penalty_coeffici:11,high_x:37,higher:11,highest:[5,6,10,23,30,32,33,37],highli:[0,39,50],hindsight:[9,34,50],hindsight_goal_selection_method:34,hindsight_transitions_per_regular_transit:34,hindsightgoalselectionmethod:34,hold:[14,26,27,34,40,41,43],horizont:[44,49,52],host:44,hostnam:0,hot:37,how:[4,7,11,30,42,44,50,52],hrl:34,html:44,http:[20,34,44],hub:44,huber:24,huber_loss_interv:24,human:[0,29],human_control:29,hyper:[38,46],hyperparamet:38,ident:26,identifi:[26,37],ies:51,ignor:29,imag:[0,26,29,32,33,37,39,43,52],image1:44,imit:[3,27,45,50],impact:26,implement:[3,7,11,26,28,29,30,34,38,39,42,46,50,52],impli:52,implment:36,importance_weight:26,importance_weight_trunc:6,importantli:41,improv:[5,18,25,29,41,50],improve_reward_model:51,includ:[0,3,4,29,31,32,36,43,47,51,52],increas:[11,23,32,50],increment:[3,51],index:[0,2,27,29,32,33,34,37],indic:37,inf:[32,37],infer:[3,26,29,51],infinit:[0,50],info:[3,14,27,37,39,51],info_as_list:27,inform:[3,4,20,27,29,31,40,41,44,47,51],inherit:[3,38,39],init_environment_dependent_modul:[3,51],initi:[3,4,11,23,26,27,38,41,49,51],initial_feed_dict:26,initial_kl_coeffici:11,initialize_session_dependent_compon:[3,51],innov:50,input:[1,2,3,4,8,13,14,16,17,19,21,22,23,26,31,37,41,43,51],input_embedders_paramet:26,input_high:32,input_low:32,input_space_high:33,input_space_low:33,inputembedderparamet:26,inputfilt:41,insert:[22,27],inspect:0,instal:[44,52],instanc:[3,35,37,43],instanti:[3,29,41],instead:[0,3,7,20,23,26,32,33,41,50,51],instruct:52,intact:[14,46],integ:[0,32,33],integr:[39,41,42,49],intel:49,intend:[10,26,30,41],interact:[27,41,42,49,52],interest:[26,40],interfac:[29,40,42,47],intermedi:22,intern:[3,10,20,26,27,31,41,51,52],intersect:50,interv:24,intro:49,introduc:50,invers:[29,47],invok:41,involv:38,is_empti:27,is_valid_index:37,item:27,iter:[3,5,6,8,11,13,18,26,51],its:[0,3,15,25,26,27,30,37,41,44,50,51,52],itself:[26,37,52],job:0,job_typ:0,joint:29,json:0,jump:[4,33],jupyt:38,just:[3,11,23,25,39,41,43,51,52],kapa:24,keep:[17,27,32,52],kei:[2,22,26,27,29,34,38,40,44,50,52],key_error_threshold:34,key_width:34,keyboard:[29,52],keyword:26,kl_coeffici:26,kl_coefficient_ph:26,know:[3,50,51,52],knowledg:[3,41,51],known:[27,40,46,50],kubeconfig:36,kubernet:44,kubernetes_orchestr:36,kubernetesparamet:36,kwarg:[26,29],l2_norm_added_delta:22,l2_regular:26,lack:40,lamb:30,lambda:[5,7,11,30],lane:2,larg:[30,32,47],larger:26,last:[4,6,11,22,27,29,32],last_env_respons:29,lastli:41,later:[0,3,26,51,52],latest:[20,22,41,44],layer:[26,30,34,41,43],lazi:[27,32],lazystack:32,lbfg:26,ld_library_path:44,lead:30,learn:[0,3,4,5,6,8,9,10,12,14,15,16,17,18,21,24,25,26,27,29,30,32,40,41,43,45,46,47,50,51],learn_from_batch:[3,38,41,51],learner:26,learning_r:[26,34],learning_rate_decay_r:26,learning_rate_decay_step:26,least:[43,50],leav:[11,14],left:[2,6,12,50],length:[4,5,7,11,20,22,26,27],less:[18,50],level:[0,3,26,29,39,51,52],levelmanag:[3,41,51],levelselect:29,libatla:44,libav:44,libavformat:44,libbla:44,libboost:44,libbz2:44,libfluidsynth:44,libgl1:44,libglew:44,libgm:44,libgstream:44,libgtk2:44,libgtk:44,libjpeg:44,liblapack:44,libnotifi:44,libopen:44,libosmesa6:44,libportmidi:44,librari:[29,44,47],libsdl1:44,libsdl2:44,libsdl:44,libsm:44,libsmpeg:44,libswscal:44,libtiff:44,libwebkitgtk:44,libwildmidi:44,like:[12,29,37,41,43,44,50],likelihood:[7,11],line:[3,41,51,52],linear:33,linearboxtoboxmap:33,linearli:33,list:[0,3,4,26,27,29,30,32,33,37,38,51,52],load:[0,3,40,42,51,52],load_memory_from_fil:[3,51],load_memory_from_file_path:52,local:[3,43,44,51],locat:[24,27,32,50],log:[0,3,5,6,10,12,51],log_to_screen:[3,51],logger:[0,3,51],look:[39,44],loop:41,loss:[1,2,3,6,7,10,11,15,16,17,24,25,26,30,38,43,51],lot:[30,40,46,50],low:[8,11,13,32,33,37],low_i:37,low_x:37,lower:[0,34,41],lowest:[32,33,37],lstm:43,lumin:32,lvert:[6,15,25],lvl:52,mai:[0,26,45,52],main:[3,38,41,43,45,51,52],mainli:42,major:30,make:[0,3,26,29,38,40,44,46,50,51],manag:[3,26,42,44,51],mandatori:[37,39,43],mani:[3,18,45,46],manner:[11,19,20,23,32,41],manual:44,map:[3,26,29,31,32,33,37,38,51],mark:27,markdown:51,mask:[14,33],masked_target_space_high:33,masked_target_space_low:33,master:[3,41,44,51],match:[2,22,26,37],mathbb:[5,6],mathcal:13,mathop:5,max:[5,6,13,15,20,25,32],max_a:[14,17,22,23],max_action_valu:27,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_kl_diverg:6,max_over_num_fram:29,max_simultaneous_selected_act:37,max_siz:34,max_spe:29,maxim:[4,16],maximum:[0,12,15,17,22,23,27,29,30,32,34,50],mean:[0,2,7,8,9,10,11,12,13,21,26,30,32,33,37,40,50],meant:43,measur:[3,4,26,29,32,37,39,50,51],measurements_nam:37,mechan:[31,42,46,52],memor:50,memori:[3,25,27,32,38,41,42,44,49,50,51],memory_backend:44,memorygranular:34,memoryparamet:[3,38],merg:[26,29],mesa:44,method:[0,5,7,11,13,20,26,32,34],metric:[0,37,40],mid:6,middlewar:[22,26,43],middleware_paramet:26,middlewareparamet:26,midpoint:24,might:[3,10,29,38,43,51],min:[6,7,13,15,23,25],min_:[12,13],min_reward_threshold:0,mind:52,minim:[2,4,15],minimap_s:29,minimum:[0,7,13,32],mitig:50,mix:[3,7,11,22,23,50],mixedmontecarloalgorithmparamet:19,mixer1:44,mixtur:[19,26],mjkei:44,mjpro150:44,mjpro150_linux:44,mkdir:44,mmc:[19,50],mmc_agent:19,mode:[23,26,28,35,36,41,42,44,52],model:[0,19,21,26,49,51,52],modif:50,modifi:6,modul:[3,38,41,42,51],modular:[38,41,43,49],monitor:42,mont:[3,23],monte_carlo_mixing_r:[19,23],more:[3,8,13,20,26,32,38,40,41,43,44,49,51,52],moreov:40,most:[3,10,22,26,27,30,43,46,50,51,52],mostli:[32,41],motiv:41,move:[6,7,11,32,40,46],mp4:0,mse:[2,6,16,17,24],much:[7,11,41,50],mujoco:[29,33,39,44,47],mujoco_kei:44,mujoco_pi:44,multi:[11,26,37,43],multiarrai:[3,51],multidimension:37,multipl:[4,7,11,20,26,29,30,32,33,34,37,40,41,46,49,52],multipli:[4,10,26,32],multiselect:33,multitask:[29,47],must:[26,32,37,46],mxnet:52,n_step:[22,25,27,34],n_step_discounted_reward:27,n_step_q_ag:20,nabla:[6,8,13],nabla_:[8,12,13],nabla_a:[8,13],naf:50,naf_ag:21,nafalgorithmparamet:21,name:[3,26,27,29,32,37,38,44,51,52],namespac:36,nasm:44,nativ:[0,29,39,47],native_rend:0,navig:3,ndarrai:[3,26,27,29,30,32,33,37,39,51],nearest:22,neat:40,nec:[0,50],nec_ag:22,necalgorithmparamet:22,necessari:[3,22,26,51],necessarili:32,need:[0,3,6,25,26,29,30,37,38,41,46,50,51,52],neg:[4,32],neighbor:22,neon_compon:38,nervanasystem:44,network:[0,3,26,30,38,41,46,49,50,51,52],network_input_tupl:26,network_nam:[3,51],network_param:30,network_paramet:26,network_wrapp:[3,26,51],networkparamet:[3,26,30,38],networkwrapp:[3,51],neural:[3,19,26,43,46],never:26,new_value_shift_coeffici:[22,34],new_weight:26,newli:[23,39,50],next:[0,3,8,13,16,17,21,23,24,27,29,41,51,52],next_stat:27,nfs_data_stor:28,nfsdatastoreparamet:28,nice:52,no_accumul:26,node:[26,43],nois:[8,9,13,21,30,41,50],noise_as_percentage_from_action_spac:30,noise_schedul:30,noisi:[10,25,30],non_episod:34,none:[0,3,7,8,11,13,26,27,29,30,32,33,37,39,51],norm:26,norm_unclipped_grad:26,norm_unclippsed_grad:26,normal:[3,4,10,30,31,32,37],note:[22,26,30,51],notebook:38,notic:[26,50],notori:[40,46,50],now:[7,39],nstepqalgorithmparamet:20,nth:25,num_act:[22,34,37],num_bins_per_dimens:33,num_class:34,num_consecutive_playing_step:[3,8,13,51],num_consecutive_training_step:[3,51],num_gpu:0,num_neighbor:34,num_predicted_steps_ahead:4,num_speedup_step:29,num_steps_between_copying_online_weights_to_target:[8,12,13,20],num_steps_between_gradient_upd:[5,6,10,20],num_task:0,num_training_task:0,num_transitions_to_start_replai:6,num_work:0,number:[0,2,4,5,6,8,10,12,13,14,15,20,22,24,25,26,27,29,30,32,33,34,40,47,51,52],number_of_knn:22,numpi:[3,26,27,29,30,32,33,37,39,51],nvidia:44,object:[0,3,25,26,29,30,32,34,41,51],observ:[0,3,4,11,26,27,29,31,39,41,51],observation_reduction_by_sub_parts_name_filt:32,observation_space_s:26,observation_space_typ:29,observation_stat:32,observation_typ:29,observationspac:37,observationspacetyp:29,observationtyp:29,off:[3,6,12,42,50,51],offer:[29,47],often:[40,41,43],old:[7,11,26,50],old_weight:26,onc:[0,7,10,11,14,15,16,17,19,20,23,24,25,26,37,52],one:[0,3,6,18,22,23,26,27,29,30,31,34,37,39,40,43,50,51],ones:[39,50],onli:[0,3,4,5,6,7,10,11,14,15,17,18,20,22,24,25,26,27,29,30,32,33,39,41,50,51,52],onlin:[8,12,13,14,15,16,17,19,20,21,22,23,24,25,26,41,43],online_network:26,onnx:[0,26],onto:31,open:[0,29,47],openai:[44,47],opencv:44,oper:[23,26,32],optim:[3,4,6,26,45,50],optimization_epoch:7,optimizer_epsilon:26,optimizer_typ:26,option:[6,10,26,29,33,37,38,40,42,43,52],orchestr:[42,44,49],order:[0,3,5,6,7,8,10,11,12,13,16,17,18,20,21,22,23,24,26,27,31,32,33,40,41,43,46,50,51],org:[20,34],origin:[20,32,33,46],ornstein:[8,9,30],other:[0,2,10,18,23,26,29,31,32,34,40,41,50],otherwis:[11,14,26,29,30,37],ou_process:30,our:7,out:[2,16,17,30,31,33,40,44,49,50,52],outcom:[30,41],output:[0,4,6,8,13,14,15,21,22,26,30,31,32,37,38,43],output_0_0:26,output_observation_spac:32,outputfilt:41,outsid:[4,30],over:[3,7,10,11,20,22,25,26,27,30,32,33,40,41,50,51],overestim:[8,13,50],overfit:11,overhead:0,overlai:40,overrid:[3,51],override_existing_kei:34,overriden:38,overview:41,overwhelm:41,overwritten:26,own:[26,38],p_j:[15,25],page:[3,46],pair:[0,37],pal:[23,50],pal_ag:23,pal_alpha:23,palalgorithmparamet:23,paper:[5,10,12,15,20,22,24,29,34,46],parallel:[6,26,40,43],parallel_predict:26,param:[3,26,27,28,29,30,35,36,38,39,51],paramet:[2,3,4,5,6,7,8,10,11,12,13,15,19,20,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,38,39,46,49,51,52],parameter_nois:30,parameters_server_host:0,parent:[3,26,51],parent_path_suffix:[3,26,51],parmet:3,pars:41,part:[0,3,14,26,27,30,32,33,42,43,46,50,51],part_nam:32,partial:33,partialdiscreteactionspacemap:33,particular:4,particularli:[29,30,37,46,50],pass:[0,4,8,9,13,21,22,26,29,30,31,39,40,41,43,52],patamet:22,patchelf:44,patchelf_0:44,path:[0,3,26,38,39,44,51,52],pattern:41,pdf:34,penal:[7,8,11,13],penalti:11,pendulum_hac:39,pendulum_with_go:39,pendulumwithgo:39,per:[0,3,4,37,38,41,51],percentag:30,percentil:30,perceptron:43,perform:[0,3,6,26,27,32,34,39,40,41,50,51],period:[43,52],persist:3,persistent_advantage_learn:23,perspect:15,phase:[3,6,7,8,9,11,12,13,26,29,30,41,51],phi:[15,25],physic:[29,47],pi_:[6,7,12],pick:[12,29],pickl:52,pickledreplaybuff:52,pip3:44,pip:44,pixel:29,place:[33,40,41],placehold:[26,30],plai:[0,3,10,14,16,17,20,30,38,40,51],plain:43,planarmap:29,planarmapsobservationspac:32,platform:[29,47],pleas:[20,46],plu:26,plugin:44,point:[32,37,41,42],polici:[1,3,4,5,6,9,12,14,20,21,22,28,38,41,42,43,44,45,49,50,51],policy_gradient_rescal:[5,7,10,11],policy_gradients_ag:10,policygradientalgorithmparamet:10,policygradientrescal:[5,7,10,11],policyoptimizationag:38,popul:41,popular:[29,47],port:0,posit:[4,32],possibl:[2,3,4,22,30,33,37,40,43,49,50,51,52],post:[31,49],post_training_command:[3,51],power:[29,47],ppo:[7,11,50],ppo_ag:11,ppoalgorithmparamet:11,pre:[8,13,30,31],predefin:[14,23,30,52],predict:[1,2,3,5,6,7,8,11,12,13,14,15,16,17,23,24,25,26,30,43,50,51],prediction_typ:[3,51],predictiontyp:[3,51],prefect:50,prefer:26,prefix:[3,51],prep:44,prepar:[3,51],prepare_batch_for_infer:[3,51],present:[18,22,26,29,32,50],preset:[0,5,38,39,41,42,44,52],press:[40,52],prevent:[8,11,13,41],previou:32,previous:[11,26],print:[0,3,52],print_networks_summari:0,priorit:[25,34],prioriti:[25,34],privat:37,probabilit:[5,6],probabl:[3,5,6,10,14,15,25,27,30,38,50,51],problem:50,procedur:6,process:[0,3,8,9,26,30,31,32,33,38,40,41,43,46,49,51],produc:26,progress:26,project:[15,25],propag:7,propagate_updates_to_dnd:22,properti:[3,26,27,29,34,38,39,44,51],proport:34,provid:[26,42],proxi:41,proxim:3,pub:[35,36,44],publish:46,purpos:[0,3,10],pursuit:2,push:[3,51],pybullet:[29,47],pygam:[0,44],pytest:44,python3:44,python:[29,34,38,44,47,49],q_i:12,qr_dqn_agent:24,quad:6,qualiti:29,quantil:[3,50],quantileregressiondqnalgorithmparamet:24,queri:[22,26,41,50],question:50,quit:40,r_i:[5,20],r_t:[4,6,7,25],rainbow:[3,38,50],rainbow_ag:38,rainbow_dqn_ag:25,rainbowag:38,rainbowagentparamet:38,rainbowalgorithmparamet:38,rainbowdqnalgorithmparamet:25,rainbowexplorationparamet:38,rainbowmemoryparamet:38,rainbownetworkparamet:38,rais:[3,27,51],ramp:[38,41],random:[0,20,29,30,37,41,46],random_initialization_step:29,randomli:[27,41],rang:[4,7,8,11,13,15,25,29,32,33,37,50],rare:22,rate:[0,6,19,22,26,29,43],rate_for_copying_weights_to_target:[6,8,12,13],rather:[4,12,40],ratio:[6,7,11,19,32],ratio_of_replai:6,raw:[29,47],reach:[0,11,37],read:[0,28],read_csv_tri:0,readabl:41,readm:44,real:3,reason:[32,46],rebuild_on_every_upd:34,receiv:[26,27],recent:[3,25,26,50,51],recommend:39,redi:[35,36,44],redispubsub:44,redispubsubmemorybackendparamet:35,reduc:[1,2,10,11,23,26,32,41,50],reduct:32,reduction_method:32,reductionmethod:32,redund:32,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,42,44],referenc:3,regard:[3,51],region:[6,50],regist:[3,51],register_sign:[3,51],registri:44,regress:[2,3,50],regula:[6,7,11],regular:[5,7,10,11,20,22,26,30,33,34,50],regularli:26,reinforc:[3,5,8,9,10,12,15,16,17,18,20,23,24,25,29,30,40,41,43,45,46,47,50],relat:[26,44],relationship:50,releas:[0,49,50],relev:[3,14,30,32,51],remov:[0,32],render:[0,3,29,39],reorder:32,repeat:[29,41],replac:[30,32,34,44],replace_mse_with_huber_loss:26,replai:[1,2,3,6,8,12,13,14,15,16,17,20,22,23,24,25,34,41,50,51,52],replay_buff:52,replicated_devic:26,repo:39,repositori:49,repres:[0,7,11,15,25,26,27,29,30,33,37,52],represent:43,reproduc:[41,46],request:[3,26,51],requir:[3,26,28,30,32,40,43,44,50,51],requires_action_valu:30,rescal:[4,5,7,10,11,26,31,32],rescale_factor:32,research:[29,46,47],reset:[3,22,26,29,30,39,51],reset_accumulated_gradi:26,reset_evaluation_st:[3,51],reset_gradi:26,reset_internal_st:[3,29,51],resourc:[42,44],respect:[8,13,27,29],respons:[3,27,29,41,51],rest:[26,27,33,44],restart:39,restor:[0,3,51],restore_checkpoint:[3,51],result:[3,4,13,15,16,17,18,24,25,26,32,33,46,50,51,52],ret:6,retrac:6,retri:0,retriev:[22,34],return_additional_data:34,reus:41,reusabl:43,reward:[0,1,2,3,4,8,10,13,19,20,25,26,27,29,31,37,39,40,41,50,51],reward_test_level:0,reward_typ:37,rgb:[29,32,37],rho:[6,8,13],rho_t:6,right:[2,3,6,12,30,33,40,50,51],rl_coach:[0,1,2,3,4,5,6,7,8,10,11,12,13,15,17,19,20,21,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,39,44,51,52],rms_prop_optimizer_decai:26,rmsprop:26,roboschool:[29,47],robot:[29,37,47,49],roboti:44,robust:51,rollout:[28,35,36,42,44,52],root:[40,44],rule:[8,13,14],run:[0,3,4,8,10,11,12,13,14,16,17,22,23,26,29,30,32,51,52],run_off_policy_evalu:[3,51],run_pre_network_filter_for_infer:[3,51],runphas:[3,51],runtim:44,rvert:[15,25],rvert_2:6,s3_bucket_nam:44,s3_creds_fil:44,s3_data_stor:28,s3_end_point:44,s3datastoreparamet:28,s_t:[4,5,6,8,12,13,14,15,16,17,19,20,21,23,25],sac:50,sai:50,same:[3,4,7,10,13,19,20,23,26,29,33,34,40,43,46,50,51],sampl:[1,2,3,5,6,8,10,11,12,13,14,15,16,17,19,20,23,24,25,26,30,34,37,41,44,51],sample_with_info:37,satur:[8,13],save:[0,3,25,26,30,44,51,52],save_checkpoint:[3,51],saver:[3,26,51],savercollect:[3,26,51],scale:[4,10,26,32,40,44,49,52],scale_down_gradients_by_number_of_workers_for_sync_train:26,scale_measurements_target:4,scaler:26,schedul:[7,30,34,41,42,44,52],scheme:[5,30,41,50],schulman:11,sci:44,scienc:46,scipi:[32,44],scope:26,scratch:50,scratchpad:0,screen:[3,29,39,52],screen_siz:29,script:41,second:[0,26,40,50,52],section:[44,45,47],see:[3,29,32,44,46,47,50,51,52],seed:[0,29,46],seen:[4,22,23,29,32,41,46,50],segment:[29,37],select:[5,14,22,26,27,30,32,33,37,39,40,41,49,52],self:[3,26,38,39,51],send:[39,43],separ:[0,3,18,32,33,43,45,50],separate_actions_for_throttle_and_brak:29,seper:10,sequenti:[4,27,34],serv:[7,10,43],server:0,server_height:29,server_width:29,sess:[3,26,51],session:[3,26,51],set:[0,2,3,4,5,6,7,8,11,13,15,16,17,19,22,23,25,26,27,29,30,32,33,37,38,42,46,47,49,50,51,52],set_environment_paramet:[3,51],set_goal:29,set_incoming_direct:[3,51],set_is_train:26,set_sess:[3,51],set_variable_valu:26,set_weight:26,setup:[3,44,51],setup_logg:[3,51],setuptool:44,sever:[0,3,7,10,11,14,26,29,30,32,38,39,40,41,43,47,50,51,52],shape:[26,32,37],share:[0,3,26,34,43,51],shared_memory_scratchpad:0,shared_optim:26,shift:[33,41],shine:40,should:[0,3,4,7,11,14,20,23,26,27,29,32,34,37,38,39,42,51,52],should_dump:0,shouldn:14,show:46,shown:46,shuffl:[3,27,51],side:[3,51],sigma:[13,30],signal:[3,41,51],signal_nam:[3,51],significantli:18,sim:[6,12],similar:[7,18,20,27,29,33,50],simpl:[10,34,38,39,43,49,50,52],simplest:50,simplif:50,simplifi:[7,40,43],simul:[29,39,47,52],simultan:7,sinc:[3,7,8,10,13,20,22,23,25,26,30,32,51],singl:[3,4,5,6,7,11,14,18,19,20,26,27,29,30,33,37,40,41,43,51],size:[26,27,30,32,33,34,37],skill:50,skip:[29,41],slave:[3,51],slice:27,slow:[26,50,52],slower:[0,13,18,26],slowli:[8,13],small:[7,13,22,34],smaller:30,smooth:[40,50],soft:[3,8,11,13,21,50],soft_actor_critic_ag:12,softactorcriticalgorithmparamet:12,softmax:[26,30],softmax_temperatur:26,softwar:44,solut:50,solv:[32,39,47,49],some:[0,3,11,26,27,30,32,38,39,40,43,46,50,51,52],sort:24,sourc:[0,1,2,3,4,5,6,7,8,10,11,12,13,15,17,19,20,21,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,39,44,47,51],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,29,30,31,32,33,34,41,49,51],spacesdefinit:[3,26,51],spatial:50,spawn:[42,44],special:18,specif:[0,3,14,18,22,26,27,38,41,52],specifi:[0,26,29,30,32,39,42,52],speed:[26,32,50],speedup:52,spread:[32,33],squar:32,squeeze_list:26,squeeze_output:26,src:44,stabil:[6,20,26,50],stabl:[43,50],stack:[3,31,32,37,51],stack_siz:[26,32],stacking_axi:32,stage:43,stai:46,standard:[7,10,11,14,30,32,40],starcraft2_environ:29,starcraft2environ:29,starcraft:[37,47],starcraftobservationtyp:29,start:[3,6,8,11,12,13,18,23,27,32,33,39,44,51],state:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,23,24,25,26,27,29,32,34,37,38,39,41,43,45,50,51],state_key_with_the_class_index:[2,34],state_spac:29,state_valu:27,statist:[3,10,32,49,51],std:12,stdev:30,steep:30,step:[0,3,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,30,32,38,39,40,41,50,51,52],stepmethod:[8,12,13,20],stochast:[12,41,50],stop:[0,29],store:[0,3,22,25,27,29,32,34,40,41,42,44,49,51,52],store_transitions_only_when_episodes_are_termin:25,str:[0,2,3,4,20,26,27,29,30,32,33,37,51],strategi:[29,47],stream:[18,42],strict:46,string:[0,26,29],structur:[0,3,27,34,38,41,51],stuff:26,style:30,sub:[33,34,35,36,37,38,41,44,52],sub_spac:37,subset:[40,46,50],subtract:23,succeed:29,success:[0,29,50],suffer:40,suffici:27,suffix:[3,26,51],suggest:38,suit:[0,47],suitabl:[42,52],sum:[4,7,10,19,26,27],sum_:[5,12,15,19,20,22,25],summari:[0,3,51],supervis:50,suppli:[3,51],support:[0,3,26,29,30,40,43,44,45,47,49,52],sure:[0,3,44,46,51],surrog:7,swig:44,swingup:29,symbol:26,sync:[3,26,41,42,51],synchron:[0,26,41,43],t_max:[10,20],tag:44,take:[0,3,10,11,18,22,23,26,29,30,31,39,40,41,51],taken:[1,2,4,5,6,7,8,11,12,13,15,18,22,23,24,25,26,27,29,30],tanh:[8,13],tar:44,target:[0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,19,20,21,22,23,24,25,26,29,32,33,37,38,41,43,51],target_act:33,target_kl_diverg:11,target_network:26,target_success_r:29,targets_horizon:20,task:[0,1,2,29,32,38,40,47],task_index:0,tau:12,td3:50,td3_agent:13,td3algorithmparamet:13,techniqu:[7,11,49,50],technolog:42,teh:26,temperatur:[26,30],temperature_schedul:30,tensor:[3,26,51],tensorboard:0,tensorflow:[0,3,26,51,52],tensorflow_support:26,term:[6,7,11],termin:[3,8,13,27,41,51],test:[0,3,5,6,8,9,10,11,12,13,26,38,46,49,52],test_using_a_trace_test:0,text:6,textrm:41,than:[0,3,11,13,26,30,40,43,51],thei:[3,22,23,26,30,40,41,42,50,51,52],them:[4,5,10,20,26,27,29,32,37,39,40,43],therefor:[0,8,13,26,31,50],theta:[6,7,8,12,13,15,25,30],theta_:[6,7],thi:[0,3,4,5,6,7,8,10,11,13,14,18,20,22,25,26,27,29,30,31,32,33,34,35,37,38,39,40,41,42,43,44,46,50,51,52],thing:40,those:[0,3,8,13,14,16,17,18,22,27,30,33,41,43,45,50,51],thousand:[11,14,15,16,17,19,23,24,25],thread:26,three:[3,42,43,44,45],threshold:[11,22,32],through:[0,3,4,8,9,10,11,13,14,22,23,26,38,39,41,43,51],tild:[8,12,13],time:[0,4,23,26,30,33,34,40,43,50],time_limit:39,timestep:[4,10],timid:44,tmp:0,togeth:[3,20,27,41,51],toggl:40,too:11,tool:[40,44,50],top:[26,29,31,32,34,39,40,50],torqu:29,total:[0,3,10,11,19,22,23,27,34,38,40,50,51],total_loss:26,total_return:27,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:30,train:[0,3,18,26,30,35,36,38,39,40,41,42,43,46,49,50,51],train_and_sync_network:26,train_on_batch:26,train_to_eval_ratio:34,trainer:[28,42],transfer:[29,35,47],transit:[1,2,3,4,5,6,8,10,11,12,13,15,16,17,20,22,23,24,25,34,38,41,42,51],transition_idx:27,tri:50,trick:46,tricki:40,trigger:[29,44],truncat:6,truncated_norm:30,trust:[6,50],ttf2:44,tune:30,tupl:[1,2,3,8,13,26,27,29,34,37,38],turn:[2,50],tutori:[38,39],tweak:[3,51],twin:3,two:[8,10,13,20,26,29,30,31,32,33,37,39,42,43,50,52],txt:44,type:[0,3,10,18,26,29,32,37,38,41,43,49,50,51,52],typic:[7,11,26,50,52],ubuntu16:44,uhlenbeck:[8,9,30],uint8:32,unbound:37,uncertain:30,uncertainti:30,unchang:11,unclip:[3,38,51],uncorrel:20,undeploi:42,under:[3,26,38,52],underbrac:5,understand:52,unifi:7,uniformli:[29,30,33,37],union:[3,27,29,30,33,37,51],uniqu:26,unit:40,unlik:11,unmask:33,unnecessari:0,unshar:[3,51],unsign:32,unspecifi:26,unstabl:[40,46],until:[0,6,10,11,22,25,30],unus:26,unzip:44,updat:[3,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,30,38,39,40,41,43,44,50,51],update_discounted_reward:27,update_filter_internal_st:[3,51],update_log:[3,51],update_online_network:26,update_step_in_episode_log:[3,51],update_target_network:26,update_transition_before_adding_to_replay_buff:[3,51],upgrad:44,upon:[3,5,38,51],upper:[6,30],usag:[33,49],use:[0,1,2,3,4,5,6,8,9,10,12,13,14,16,17,21,26,27,28,29,30,32,33,34,37,38,39,41,43,44,49,50,51,52],use_accumulated_reward_as_measur:4,use_cpu:0,use_deterministic_for_evalu:12,use_full_action_spac:29,use_kl_regular:[7,11],use_non_zero_discount_for_terminal_st:[8,13],use_separate_networks_per_head:26,use_target_network_for_evalu:[8,13],use_trust_region_optim:6,used:[0,2,3,5,6,7,8,10,11,12,13,14,15,19,20,21,22,23,24,26,29,30,32,33,34,35,36,38,39,41,42,43,46,51,52],useful:[0,3,4,25,26,30,32,37,46,50,51,52],user:[26,29,30,40,41,44],userguid:44,uses:[0,1,7,11,18,27,28,30,36,41,42,44,46,50,52],using:[0,3,5,6,7,8,10,11,12,13,16,17,19,20,21,22,23,25,26,28,29,30,32,35,38,39,40,42,47,50,51,52],usr:44,usual:[32,41],util:[3,40,51],v_max:15,v_min:15,val:[3,37,51],valid:[0,37],valu:[0,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,20,21,22,23,25,26,27,29,30,32,33,34,37,38,41,43,44,45,50,51],valuabl:40,value_targets_mix_fract:[7,11],valueexcept:[3,51],valueoptimizationag:38,van:4,vari:43,variabl:[26,29,44],variable_scop:26,varianc:[10,30,40,50],variant:[30,34,50],variou:[3,27,34,49],vector:[3,4,8,9,11,13,14,26,29,32,37,39,43,50,51],vectorobservationspac:32,verbos:29,veri:[0,7,8,10,13,18,22,40,50,52],version:[7,11,27],versu:26,vert:12,vertic:26,via:[2,14],video:[0,3,29],video_dump_method:0,view:40,viewabl:[3,51],visit:46,visual:[0,3,29,47,49],visualization_paramet:29,visualizationparamet:[3,29],vizdoom:[44,47],vote:30,wai:[3,7,11,30,33,39,41,43,49,50,51,52],wait:[5,26,42],walk:39,want:[3,4,25,26,32,33,34,51],warn:[30,32,33],wasn:27,weather_id:29,websit:[29,49],weight:[4,5,6,7,8,11,12,13,14,15,16,17,19,20,21,22,23,24,25,26,30,41,43,50],well:[22,26,30,37,50],went:11,were:[4,15,16,17,18,22,24,25,26,27,33,46],west:44,wget:44,what:[11,50],whatev:[3,51],when:[0,3,4,5,6,7,8,9,10,11,12,13,22,26,27,28,29,30,32,35,36,38,39,40,51,52],whenev:42,where:[2,3,4,5,6,7,11,14,15,18,20,22,23,25,26,27,29,30,32,33,37,40,50,51],whether:30,which:[0,1,2,3,5,6,7,8,10,11,12,13,14,18,20,21,22,23,24,26,27,28,29,30,32,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50,51,52],who:41,why:[40,41],window:[32,33],wise:32,within:[0,7,11,21,30,37,40],without:[5,11,33,34,40,50,52],won:[4,26],wont:26,work:[3,20,26,30,32,33,40,41,50,51,52],workaround:0,workdir:44,worker:[0,20,26,28,32,34,35,36,40,42,43,44,50,52],worker_devic:26,worker_host:0,wors:50,would:[26,44,50],wrap:[29,32,41,47],wrapper:[3,26,27,29,37,43,51],write:[0,3,51],written:[3,25,28,51],www:44,xdist:44,y_t:[8,12,13,14,16,17,19,21,22,23],year:50,yet:[18,39],you:[4,32,34,38,39,44,49,52],your:[38,39,44,52],yuv:32,z_i:[15,25],z_j:[15,25],zero:[2,13,16,17],zip:44,zlib1g:44},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","ACER","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Soft Actor-Critic","Twin Delayed Deep Deterministic Policy Gradient","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":22,"function":21,"new":[38,39],"switch":52,Adding:[38,39],Using:39,acer:6,across:50,action:[4,5,6,7,8,9,10,11,12,13,14,21,22,33,37,50],actioninfo:27,actor:[5,9,12],addit:[0,52],additivenois:30,advantag:[21,23],agent:[3,38,41,52],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20,21,22,23,24,25,45,50,52],api:39,architectur:26,attentionactionspac:37,backend:35,balancedexperiencereplai:34,batch:27,behavior:1,benchmark:46,between:52,blizzard:29,boltzmann:30,bootstrap:[14,30],boxactionspac:37,build:44,can:50,carla:29,carlo:19,categor:[15,30],choos:[4,5,6,7,8,9,10,11,12,13,14,21,22],clip:7,clone:[1,44],coach:[39,40,42,44,49],collect:50,compar:40,compoundactionspac:37,condit:2,config:44,contain:44,continu:[7,11,12,50],continuousentropi:30,control:[22,29,41],copi:43,core:27,creat:44,critic:[5,9,12],dashboard:40,data:28,deep:[8,13,17,52],deepmind:29,delai:13,demonstr:50,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25],design:43,determinist:[8,13],direct:4,discret:[5,6,10,50],discreteactionspac:37,distribut:[42,44],distributedtaskparamet:0,doe:50,doubl:16,dqn:[14,15,16,18,24],duel:18,dump:52,egreedi:30,environ:[29,39,47,50,52],envrespons:27,episod:[22,27,34],episodicexperiencereplai:34,episodichindsightexperiencereplai:34,episodichrlhindsightexperiencereplai:34,evalu:52,experiencereplai:34,explor:30,explorationpolici:30,featur:48,file:44,filter:[31,32,33],flag:52,flow:41,framework:52,from:50,futur:4,gener:18,gif:52,goal:37,gradient:[8,10,13],graph:41,greedi:30,gym:[29,39],have:50,hierarch:9,horizont:42,human:[50,52],imag:44,imageobservationspac:37,imit:[2,52],implement:44,input:32,interfac:44,keep:43,kubernet:36,learn:[2,20,23,49,52],level:41,manag:41,memori:[34,35],mix:19,mont:19,more:50,multi:52,multipl:50,multiselectactionspac:37,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,43],networkwrapp:26,neural:22,nfsdatastor:28,node:[50,52],non:34,normal:21,observ:[32,37],observationclippingfilt:32,observationcropfilt:32,observationmoveaxisfilt:32,observationnormalizationfilt:32,observationreductionbysubpartsnamefilt:32,observationrescalesizebyfactorfilt:32,observationrescaletosizefilt:32,observationrgbtoyfilt:32,observationsqueezefilt:32,observationstackingfilt:32,observationtouint8filt:32,openai:[29,39],optim:[7,11],orchestr:36,ouprocess:30,out:42,output:33,pain:50,parallel:50,paramet:0,parameternois:30,persist:23,plai:52,planarmapsobservationspac:37,polici:[7,8,10,11,13,30],predict:4,prerequisit:44,presetvalidationparamet:0,prioritizedexperiencereplai:34,process:50,proxim:[7,11],push:44,qdnd:34,quantil:24,rainbow:25,redispubsubbackend:35,regress:24,reinforc:49,render:52,repositori:44,reward:32,rewardclippingfilt:32,rewardnormalizationfilt:32,rewardrescalefilt:32,run:[40,44],s3datastor:28,sampl:50,scale:42,select:50,signal:40,simul:50,singl:52,singleepisodebuff:34,soft:12,solv:50,space:[37,50],starcraft:29,statist:40,step:20,store:[14,28],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25],suit:29,support:42,sync:43,synchron:42,task:50,taskparamet:0,test:51,thread:52,through:52,track:40,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20,21,22,23,24,25,52],transit:[14,27],transitioncollect:34,truncatednorm:30,twin:13,type:[27,42],ucb:30,usag:[44,52],vectorobservationspac:37,visual:[40,52],visualizationparamet:0,vizdoom:29,you:50,your:50}}) |