1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00
Files
coach/docs/searchindex.js
shadiendrawis 2b5d1dabe6 ACER algorithm (#184)
* initial ACER commit

* Code cleanup + several fixes

* Q-retrace bug fix + small clean-ups

* added documentation for acer

* ACER benchmarks

* update benchmarks table

* Add nightly running of golden and trace tests. (#202)

Resolves #200

* comment out nightly trace tests until values reset.

* remove redundant observe ignore (#168)

* ensure nightly test env containers exist. (#205)

Also bump integration test timeout

* wxPython removal (#207)

Replacing wxPython with Python's Tkinter.
Also removing the option to choose multiple files as it is unused and causes errors, and fixing the load file/directory spinner.

* Create CONTRIBUTING.md (#210)

* Create CONTRIBUTING.md.  Resolves #188

* run nightly golden tests sequentially. (#217)

Should reduce resource requirements and potential CPU contention but increases
overall execution time.

* tests: added new setup configuration + test args (#211)

- added utils for future tests and conftest
- added test args

* new docs build

* golden test update
2019-02-20 23:52:34 +02:00

1 line
57 KiB
JavaScript

Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/acer","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/acer.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.acer_agent":{ACERAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],emulate_act_on_trainer:[3,1,1,""],emulate_observe_on_trainer:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[13,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[8,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[49,0,1,""],DQNAlgorithmParameters:[15,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[49,1,1,""],call_memory:[49,1,1,""],choose_action:[49,1,1,""],collect_savers:[49,1,1,""],create_networks:[49,1,1,""],emulate_act_on_trainer:[49,1,1,""],emulate_observe_on_trainer:[49,1,1,""],get_predictions:[49,1,1,""],get_state_embedding:[49,1,1,""],handle_episode_ended:[49,1,1,""],init_environment_dependent_modules:[49,1,1,""],learn_from_batch:[49,1,1,""],log_to_screen:[49,1,1,""],observe:[49,1,1,""],parent:[49,2,1,""],phase:[49,2,1,""],post_training_commands:[49,1,1,""],prepare_batch_for_inference:[49,1,1,""],register_signal:[49,1,1,""],reset_evaluation_state:[49,1,1,""],reset_internal_state:[49,1,1,""],restore_checkpoint:[49,1,1,""],run_pre_network_filter_for_inference:[49,1,1,""],save_checkpoint:[49,1,1,""],set_environment_parameters:[49,1,1,""],set_incoming_directive:[49,1,1,""],set_session:[49,1,1,""],setup_logger:[49,1,1,""],sync:[49,1,1,""],train:[49,1,1,""],update_log:[49,1,1,""],update_step_in_episode_log:[49,1,1,""],update_transition_before_adding_to_replay_buffer:[49,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[11,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[23,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[24,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[24,1,1,""],apply_and_reset_gradients:[24,1,1,""],apply_gradients:[24,1,1,""],collect_savers:[24,1,1,""],construct:[24,3,1,""],get_variable_value:[24,1,1,""],get_weights:[24,1,1,""],parallel_predict:[24,3,1,""],predict:[24,1,1,""],reset_accumulated_gradients:[24,1,1,""],set_variable_value:[24,1,1,""],set_weights:[24,1,1,""],train_on_batch:[24,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[24,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[24,1,1,""],apply_gradients_to_global_network:[24,1,1,""],apply_gradients_to_online_network:[24,1,1,""],collect_savers:[24,1,1,""],parallel_prediction:[24,1,1,""],set_is_training:[24,1,1,""],sync:[24,1,1,""],train_and_sync_networks:[24,1,1,""],update_online_network:[24,1,1,""],update_target_network:[24,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[24,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[25,0,1,""],Batch:[25,0,1,""],EnvResponse:[25,0,1,""],Episode:[25,0,1,""],Transition:[25,0,1,""]},"rl_coach.core_types.Batch":{actions:[25,1,1,""],game_overs:[25,1,1,""],goals:[25,1,1,""],info:[25,1,1,""],info_as_list:[25,1,1,""],n_step_discounted_rewards:[25,1,1,""],next_states:[25,1,1,""],rewards:[25,1,1,""],shuffle:[25,1,1,""],size:[25,2,1,""],slice:[25,1,1,""],states:[25,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[25,1,1,""],get_last_transition:[25,1,1,""],get_transition:[25,1,1,""],get_transitions_attribute:[25,1,1,""],insert:[25,1,1,""],is_empty:[25,1,1,""],length:[25,1,1,""],update_discounted_rewards:[25,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[26,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[26,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[27,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[27,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[27,0,1,""]},"rl_coach.environments.environment":{Environment:[27,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[27,2,1,""],close:[27,1,1,""],get_action_from_user:[27,1,1,""],get_available_keys:[27,1,1,""],get_goal:[27,1,1,""],get_random_action:[27,1,1,""],get_rendered_image:[27,1,1,""],goal_space:[27,2,1,""],handle_episode_ended:[27,1,1,""],last_env_response:[27,2,1,""],phase:[27,2,1,""],render:[27,1,1,""],reset_internal_state:[27,1,1,""],set_goal:[27,1,1,""],state_space:[27,2,1,""],step:[27,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[27,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[27,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[28,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[28,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[28,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[28,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[28,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[28,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[28,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[28,1,1,""],get_action:[28,1,1,""],requires_action_values:[28,1,1,""],reset:[28,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[28,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[28,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[28,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[28,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[28,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[31,0,1,""],BoxDiscretization:[31,0,1,""],BoxMasking:[31,0,1,""],FullDiscreteActionSpaceMap:[31,0,1,""],LinearBoxToBoxMap:[31,0,1,""],PartialDiscreteActionSpaceMap:[31,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[30,0,1,""],ObservationCropFilter:[30,0,1,""],ObservationMoveAxisFilter:[30,0,1,""],ObservationNormalizationFilter:[30,0,1,""],ObservationRGBToYFilter:[30,0,1,""],ObservationReductionBySubPartsNameFilter:[30,0,1,""],ObservationRescaleSizeByFactorFilter:[30,0,1,""],ObservationRescaleToSizeFilter:[30,0,1,""],ObservationSqueezeFilter:[30,0,1,""],ObservationStackingFilter:[30,0,1,""],ObservationToUInt8Filter:[30,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[30,0,1,""],RewardNormalizationFilter:[30,0,1,""],RewardRescaleFilter:[30,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[33,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[32,0,1,""],EpisodicHRLHindsightExperienceReplay:[32,0,1,""],EpisodicHindsightExperienceReplay:[32,0,1,""],SingleEpisodeBuffer:[32,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[32,0,1,""],ExperienceReplay:[32,0,1,""],PrioritizedExperienceReplay:[32,0,1,""],QDND:[32,0,1,""],TransitionCollection:[32,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[34,0,1,""]},"rl_coach.spaces":{ActionSpace:[35,0,1,""],AttentionActionSpace:[35,0,1,""],BoxActionSpace:[35,0,1,""],CompoundActionSpace:[35,0,1,""],DiscreteActionSpace:[35,0,1,""],GoalsSpace:[35,0,1,""],ImageObservationSpace:[35,0,1,""],MultiSelectActionSpace:[35,0,1,""],ObservationSpace:[35,0,1,""],PlanarMapsObservationSpace:[35,0,1,""],Space:[35,0,1,""],VectorObservationSpace:[35,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[35,1,1,""],contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""],sample_with_info:[35,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[35,0,1,""],clip_action_to_space:[35,1,1,""],contains:[35,1,1,""],distance_from_goal:[35,1,1,""],get_reward_for_goal_and_state:[35,1,1,""],goal_from_state:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""],sample_with_info:[35,1,1,""]},"rl_coach.spaces.ObservationSpace":{contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""]},"rl_coach.spaces.Space":{contains:[35,1,1,""],is_valid_index:[35,1,1,""],sample:[35,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":31,"160x160":30,"1_0":[13,23],"1st":28,"20x20":31,"210x160":30,"2nd":28,"50k":39,"9_amd64":42,"abstract":[36,40],"boolean":[3,25,35,49],"break":38,"case":[0,3,5,20,24,25,28,35,48,49,50],"class":[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,39,43,49],"default":[0,28,50],"enum":[24,27,35],"export":[0,24,42],"final":[8,14,15,17,21,39],"float":[3,4,5,6,7,8,10,11,13,17,20,21,22,24,25,27,28,30,31,32,35,36,49],"function":[0,1,3,6,7,8,11,24,27,28,35,36,37,39,41,49],"import":[6,16,28,32,37,48,50],"int":[0,3,4,5,6,7,10,13,18,20,22,23,25,27,28,30,31,32,35,49],"long":41,"new":[0,3,7,8,11,20,21,24,25,31,39,40,47,48,49],"return":[0,3,8,10,11,12,17,20,21,23,24,25,27,28,30,32,35,36,37,39,48,49],"short":[0,39],"static":24,"super":[36,37],"switch":38,"true":[0,3,4,5,6,7,8,11,20,21,23,24,25,27,28,31,32,35,49],"try":[4,44,48],"while":[0,5,6,8,9,10,11,24,27,38,41,48,50],AWS:42,Adding:[16,47],And:[37,48],But:[38,48],Doing:48,For:[0,1,2,3,4,7,10,12,13,14,15,18,20,21,24,25,27,28,29,30,31,35,36,37,39,40,41,42,44,49,50],Has:24,Its:49,NFS:[26,42],One:[22,50],That:38,The:[0,1,2,3,4,5,6,7,8,10,11,12,13,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,38,39,40,41,42,44,45,47,48,49,50],Then:[4,7,8,12,19,21],There:[7,11,24,28,29,36,37,41,50],These:[1,2,3,22,27,34,40,41,42],Use:[1,2,8,19,20],Used:28,Uses:48,Using:[8,12,14,15,42],Will:24,With:[28,47],__init__:[27,36,37],_index:[5,18],_render:37,_restart_environment_episod:37,_take_act:37,_update_st:37,a2c:48,a3c:[10,18,38,48],a_i:20,a_t:[4,5,6,8,12,13,14,15,17,18,19,21,23],a_valu:5,abl:[31,48],about:[3,25,39,49,50],abov:[8,24,39],abs:[18,32],absolut:28,acceler:19,accept:27,access:[24,36,42],accord:[0,3,4,5,6,8,12,18,24,25,28,35,38,39,41,49],accordingli:[20,35,39,50],account:[4,7,11,20,21,28],accumul:[3,4,5,6,10,18,20,23,24,30,48,49],accumulate_gradi:24,accumulated_gradi:24,accur:48,acer:[3,48],acer_ag:6,aceralgorithmparamet:6,achiev:[0,4,7,27,30,32,35,44,48,50],across:[10,17,38],act:[3,4,8,12,22,35,36,39,49],action:[1,2,3,13,14,15,16,17,18,21,22,23,24,25,27,28,29,32,36,37,39,41,49],action_idx:37,action_intrinsic_reward:25,action_penalti:8,action_spac:[27,28],action_space_s:24,action_valu:[25,28],actioninfo:[3,35,39,49],actionspac:[28,35],actiontyp:37,activ:[8,24],actor:[3,6,7,8,11,28,41,48],actor_critic_ag:5,actorcriticag:36,actorcriticalgorithmparamet:5,actual:[4,5,13,14,15,22,23,28,31,32],adam:[7,24],adam_optimizer_beta1:24,adam_optimizer_beta2:24,adapt:[7,11],add:[8,9,19,25,28,30,37,39,42],add_rendered_image_to_env_respons:0,added:[0,4,6,7,10,11,20,28,32,36],adding:[3,11,28,36,49],addit:[3,24,25,27,28,30,32,35,37,38,39,41,47,48,49],addition:[24,27,30,36,37,39,44,45,50],additional_fetch:24,additional_simulator_paramet:[27,37],additionali:38,additive_nois:28,additivenoiseparamet:28,advanc:[23,47],advantag:[3,5,7,11,16,28],affect:[0,12,24],aforement:[14,15,21],after:[0,3,8,10,11,18,19,21,23,24,25,27,30,35,49,50],again:28,agent:[0,1,2,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,27,28,29,30,31,35,37,38,41,43,44,47,48,49],agent_param:40,agent_paramet:[3,24,49],agentparamet:[3,24,36],aggreg:39,ahead:[4,48],aim:28,algorithm:[3,25,28,36,38,39,40,44,46,47,49],algorithmparamet:[3,36],all:[0,3,10,12,20,21,24,25,27,28,30,31,35,36,37,38,39,40,41,42,45,49,50],all_action_prob:25,allow:[0,3,4,16,24,25,27,28,29,30,31,32,38,39,40,41,47,48,49,50],allow_brak:27,allow_duplicates_in_batch_sampl:32,allow_no_action_to_be_select:35,along:[20,27,28,45],alpha:[6,17,21,32],alreadi:[20,25,37,48],also:[5,6,7,20,21,24,27,35,36,38,44,48,50],altern:[27,37,45],alwai:[24,28,31],amazon:42,amazonaw:42,amount:[8,10,17,21,28,39,48],analysi:38,analyz:38,ani:[3,24,25,27,31,32,36,39,40,41,42,49],anoth:[3,16,24,29,49],answer:48,api:[27,41,45,47],appear:[3,49],appli:[0,3,5,8,10,18,24,25,28,30,48,49],applic:48,apply_and_reset_gradi:24,apply_gradi:24,apply_gradients_and_sync_network:24,apply_gradients_every_x_episod:[5,10,18],apply_gradients_to_global_network:24,apply_gradients_to_online_network:24,apply_stop_condit:0,appropri:42,approx:8,approxim:[41,48],apt:42,arbitrari:30,architectur:[3,16,36,47,49],architecture_num_q_head:28,area:31,arg:[3,24,42,49],argmax_a:[14,17,21],argument:[3,13,23,24,27,35,39,49],around:[24,25,41],arrai:[3,24,25,27,30,35,37,49],art:[3,43],artifact:42,artifici:32,arxiv:[18,32],aspect:[28,30,38],assign:[0,2,5,6,24,28],assign_kl_coeffici:24,assign_op:24,assum:[25,28,30,32,48],async:[24,40],async_train:24,asynchron:[5,18,24],atari:[15,27,30,42,50],atari_a3c:50,atari_dqn:50,ath:16,atom:[13,22,23],attach:27,attend:31,attent:31,attentionactionspac:31,attentiondiscret:31,attribut:25,attribute_nam:25,author:[27,44,45],auto_select_all_armi:27,autoclean:42,automat:[24,50],autonom:[27,45,47],autoremov:42,auxiliari:[27,45],avail:[4,24,25,27,38,40,42,47,48,50],averag:[6,7,11,24,38,39],avg:6,aws:42,axes:[30,38],axi:[30,38],axis_origin:30,axis_target:30,back:[7,40],backend:[24,40,42,47,50],background:50,backpropag:20,backward:24,balanc:2,band:38,bar:6,base1:42,base64:42,base:[7,11,17,19,21,27,32,36,39,42,45,48],base_paramet:[0,3,24,27,28],baselin:48,basic:[10,25,40,50],batch:[1,2,3,4,5,6,8,10,11,12,13,14,15,16,18,21,22,23,24,32,36,39,49],batch_siz:24,bc_agent:1,bcalgorithmparamet:1,becaus:39,becom:[8,40],been:[16,25,30,44,48],befor:[3,5,11,23,24,25,30,39,40,41,42,48,49],begin:[0,4,39],behav:35,behavior:[3,30,32,36,44,48,49,50],being:[3,36,47,48,49],bellman:[13,22,23],benchmark:[38,46,47,48],best:[48,50],beta1:24,beta2:24,beta:[6,8,10,32],beta_entropi:[5,6,7,10,11],better:[16,48],between:[0,1,2,3,6,7,8,10,11,13,17,18,20,22,23,24,25,27,28,31,32,35,36,38,39,41,47,48],bfg:[7,11],bia:[6,48],big:[11,13,23],bilinear:30,bin:[31,42],binari:12,bind:24,binomi:12,bit:30,blizzard:45,blob:[27,30],block:47,blog:47,boilerpl:39,bolling:38,bool:[0,3,4,5,6,7,8,11,20,21,23,24,25,27,28,32,35,49],boost:[42,48],bootstrap:[3,5,6,7,8,11,17,18,20,21,23,25,48],bootstrap_total_return_from_old_polici:[20,25],both:[3,7,24,27,28,31,48,49],bound:[6,7,11,13,23,28,35,48],box2d:42,box:[28,31,35],boxactionspac:31,boxdiscret:31,boxmask:31,breakout:50,breakoutdeterminist:[27,50],bring:11,bucket:42,buffer:[1,2,3,6,12,13,14,15,18,20,21,22,23,32,39,48,49,50],build:[29,47,48],builder:42,built:[36,39],bullet:6,button:[38,50],c51:13,cach:42,calcul:[3,4,5,6,7,8,10,11,12,13,14,15,17,18,20,21,22,23,24,25,28,32,36,49],call:[0,3,10,18,24,25,27,39,49],call_memori:[3,49],callabl:35,camera:[27,37],camera_height:27,camera_width:27,cameratyp:[27,37],can:[0,2,3,5,6,7,8,11,21,24,25,27,28,29,30,31,35,36,37,38,39,41,45,47,49,50],cannot:[3,49],carla:[30,45],carla_environ:27,carlaenviron:27,carlaenvironmentparamet:27,carlo:[3,21],cartpol:[27,37],cartpole_a3c:50,cartpole_clippedppo:[42,50],cartpole_dqn:50,categor:[3,5,6,48],categori:[29,30],categorical_dqn_ag:13,categoricaldqnalgorithmparamet:13,caus:[30,38],cdot:[5,7,8,10,12,13,14,15,17,19,21,23],central:[24,38],chain:8,challeng:39,chang:[0,3,6,7,8,11,12,16,18,21,28,39,42,49],change_phas:28,channel:[27,30],channels_axi:35,check:[0,3,25,35,49],checkpoint:[0,3,24,26,40,42,49,50],checkpoint_dir:[3,49],checkpoint_prefix:[3,49],checkpoint_restore_dir:[0,50],checkpoint_save_dir:0,checkpoint_save_sec:0,child:24,chmod:42,choic:[36,42],choos:[3,16,21,28,29,31,35,36,39,41,48,49,50],choose_act:[3,36,39,49],chosen:[3,21,28,31,36,49],chunk:11,cil:48,cil_ag:2,cilalgorithmparamet:2,classic_control:42,clean:[27,36,42],cli:42,clip:[3,6,8,11,24,30,35,48],clip_action_to_spac:35,clip_critic_target:8,clip_gradi:24,clip_high:28,clip_likelihood_ratio_using_epsilon:[7,11],clip_low:28,clip_max:30,clip_min:30,clipbyglobalnorm:24,clipped_ppo_ag:7,clippedppoalgorithmparamet:7,clipping_high:30,clipping_low:30,clone:[3,48],close:27,cmake:42,coach:[0,3,24,26,27,28,29,33,34,36,39,43,44,45,48,50],code:[37,39,48],coeffici:[7,11,24,28,32],collect:[3,7,10,11,18,24,25,32,39,44,47,49,50],collect_sav:[3,24,49],color:30,com:42,combin:[23,41,47,48],comma:0,command:[39,42,50],common:[36,38,42,50],commun:40,compar:[0,11,16,48],complet:[25,28,39],complex:[24,29,39,41,48,50],compon:[3,13,23,24,28,34,36,39,47,49,50],composit:[3,49],compositeag:[3,49],comput:[24,28],concat:24,concentr:39,condit:[0,3],confid:28,config:[27,50],configur:[3,5,10,36,42,49],confus:39,connect:24,connectionist:10,consecut:[8,20],consequ:[18,28],consid:[5,6,31,38],consist:[8,27,30,31,35,39,45],constant:6,constantli:50,constantschedul:32,constrain:31,construct:[24,32],consumpt:30,contain:[0,1,2,3,12,24,25,27,35,37,39,49,50],content:42,contin:40,continu:[1,2,5,8,9,10,19,28,29,31,35,44],continuous_entropi:28,continuous_exploration_policy_paramet:28,contribut:[4,47],control:[2,3,5,6,7,8,11,24,28,30,38,45,47,48,49],control_suite_environ:27,controlsuiteenviron:27,conveni:[38,50],converg:10,convers:29,convert:[3,25,28,30,35,39,41,49],convolut:[24,41],coordin:31,copi:[8,12,13,14,15,17,18,19,21,22,23,24,42],core:[3,47,49],core_typ:[3,25,27,35,49],correct:[3,6,48],correctli:24,correl:28,correpond:25,correspond:[2,3,4,13,14,24,25,28,30,35,37,49],could:[3,24,35,42,49],count:17,countabl:31,counter:[3,49],counterpart:41,cpu:[0,24],crd:50,creat:[3,18,24,30,37,49,50],create_network:[3,49],create_target_network:24,creation:[3,49],credenti:42,critic:[3,6,7,8,11,28,41,48],crop:[30,31],crop_high:30,crop_low:30,cross:[1,13,23],csv:0,ctrl:38,cuda:42,cudnn7:42,curl:42,curr_stat:[3,36,49],current:[0,1,2,3,4,6,7,8,9,10,11,12,14,15,17,19,20,21,22,24,25,27,28,30,31,35,36,39,47,48,49],custom:[27,28,35,36,39],custom_reward_threshold:27,cycl:39,dai:50,dashboard:[0,3,42,47,49],data:[0,10,18,24,32,39,40,42,44,47,48,50],data_stor:[26,42],dataset:[7,11,48,50],date:[20,41,48,50],dcp:[42,50],ddpg:48,ddpg_agent:8,ddpgalgorithmparamet:8,ddqn:[17,21,48],deal:48,debug:[0,38,47],decai:[5,7,11,24],decid:[0,3,4,27,36,49],decis:[3,49],decod:42,dedic:24,deep:[0,3,5,12,14,16,18,19,23,49],deepmind:45,def:[36,37],default_act:35,default_input_filt:37,default_output_filt:37,defin:[0,3,5,6,7,10,11,18,20,21,24,25,27,28,30,31,32,35,36,37,39,40,41,44,45,49,50],definit:[3,24,27,35,37,39,49],delai:48,delta:[6,13,20,23],demonstr:[1,2,50],dens:28,densiti:17,depend:[0,3,6,24,30,32,35,37,42,44,48,49],deploi:[34,40],depth:27,descend:48,describ:[3,13,22,30,32,36,39,42,49],descript:[3,31,35,43,50],design:[39,42,47],desir:[31,36],destabil:10,detail:[3,25,43,45,47,50],determin:[2,3,20,25,32,49],determinist:[3,48],dev:42,develop:[39,44],deviat:[10,11,28,30,38],devic:24,dfp:48,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,24,25,27,28,35,49],dict_siz:32,dictat:4,dictionari:[2,3,24,25,27,32,35,36,49],did:27,differ:[0,1,2,3,4,5,6,7,10,11,12,16,24,27,28,30,35,36,37,38,40,41,47,48,49],differenti:16,difficult:[38,44],difficulti:50,dimens:[25,27,30,31],dimension:[11,31],dir:[3,49,50],direct:[3,27,49],directli:[3,5,39,41,49],directori:[0,24,36,38,42,50],disabl:50,disable_fog:27,disappear:27,disassembl:48,discard:[25,30],discount:[8,10,11,17,20,21,23,24,25,48],discret:[1,2,4,7,11,12,13,14,15,16,17,18,20,21,22,23,28,29,30,31,35,39],disentangl:39,disk:0,displai:[0,38],distanc:35,distance_from_go:35,distance_metr:35,distancemetr:35,distil:[3,49],distribut:[3,5,6,10,11,13,22,23,24,26,28,33,34,35,41,47,48,49,50],distributed_coach:40,distributed_coach_synchronization_typ:40,distributedcoachsynchronizationtyp:40,divereg:[7,11],diverg:[6,7,11,23],dnd:[0,20,48],dnd_key_error_threshold:20,dnd_size:20,do_action_hindsight:32,doc:42,docker:42,dockerfil:42,document:45,doe:[12,24,30],doesn:40,doing:[7,11,29],domain:41,don:[4,28,38,48],done:[0,3,7,10,11,27,30,37,49,50],doom:[27,37,42,45],doom_basic_bc:50,doom_basic_dqn:50,doom_environ:[27,37,50],doomenviron:[27,37],doomenvironmentparamet:[37,50],doominputfilt:37,doomlevel:27,doomoutputfilt:37,doubl:[3,17,23],down:[24,27],download:42,dpkg:42,dqn:[3,17,18,23,27,28,30,31,39,41,48],dqn_agent:[15,49],dqnagent:49,dqnalgorithmparamet:15,drive:[2,27,45,47],driving_benchmark:27,due:30,duel:[3,23],dump:[0,3,49],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,49],dump_one_value_per_step:[3,49],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,7,10,11,12,20,28,38,39,49,50],dynam:[38,44,48],e_greedi:28,each:[0,1,2,3,4,5,6,7,10,11,12,14,15,16,18,20,21,22,24,25,27,28,29,30,31,32,35,36,38,39,40,41,42,44,48,49],eas:38,easi:[37,38,47],easier:41,easili:[28,50],echo:42,effect:[0,3,6,7,18,30,39,49],effici:[6,39,48],either:[0,3,5,18,24,28,35,38,41,50],element:[3,12,24,30,35],elf:42,embbed:24,embed:[3,20,24,49],embedd:[24,41],embedding_merger_typ:24,embeddingmergertyp:24,emploi:48,empti:25,emul:[3,6,49],emulate_act_on_train:[3,49],emulate_observe_on_train:[3,49],enabl:[24,41,50],encod:[30,35],encourag:[19,21,39],end:[2,3,10,23,25,27,30,49,50],enforc:31,engin:[27,45],enough:[4,6,20],ensembl:[28,48],ensur:[6,24],enter:[3,49,50],entir:[11,17,20,23,28,31,39],entri:[20,39],entropi:[1,5,6,7,10,11,13,23,28],enumer:35,env:[25,42],env_param:37,env_respons:[3,49],enviorn:27,environ:[0,3,4,6,16,24,25,28,29,30,31,35,36,39,42,44,46,47,49],environmentparamet:[27,37],envrespons:[0,3,27,49],episod:[0,3,4,5,10,11,12,17,18,23,27,28,36,37,38,39,40,49,50],episode_max_tim:27,episodic_hindsight_experience_replai:32,epoch:7,epsilon:[7,28,32],epsilon_schedul:28,equal:2,equat:[8,14,15,18,22],error:[24,48],escap:50,especi:16,essenti:[18,24,31,37,39,42],estim:[5,7,11,12,17,21,28],estimate_state_value_using_ga:[5,7,11],eta:[7,11],etc:[0,3,24,27,29,35,36,45,49],evalu:[0,3,24,25,28,39,49],evaluate_onli:0,evaluation_epsilon:28,evaluation_noise_percentag:28,even:[16,24,27,37,38,39,48],everi:[0,5,6,8,10,12,13,14,15,17,18,19,21,22,23,50],exact:[20,28,44],exactli:24,exampl:[2,3,4,24,25,27,28,29,30,31,35,36,37,39,41,49,50],except:[18,25],execut:[25,38,39],exhibit:[3,36,49],exist:[20,24],exit:[3,49],expand_dim:25,expect:[0,3,28,44,49],experi:[0,6,8,11,23,27,32,33,38,39,40,42,47,48,50],experiment_path:[0,27],experiment_suit:27,experimentsuit:27,expert:[1,2,25,48],exploit:[28,39],explor:[3,4,5,6,7,8,9,11,12,17,19,20,36,39,47,48],exploration_polici:28,explorationparamet:[3,28,36],exponenti:[6,7,11,23,24],expor:3,export_onnx_graph:0,expos:[38,41,47],extend:[27,28,45],extens:[27,45],extent:50,extern:0,extra:[24,25,41],extract:[3,19,20,25,30,35,38,39,49],factor:[8,10,11,21,23,24,25,28,30],faithfulli:38,fake:35,fals:[0,3,8,24,25,27,28,31,32,35,37,49],far:[11,30,39,44],faster:[16,48],featur:[8,27,41,47,48],feature_minimap_maps_to_us:27,feature_screen_maps_to_us:27,fetch:[24,25],fetched_tensor:24,few:[10,12,13,14,15,17,21,22,23,28,37],field:[44,47],file:[0,3,36,39,49,50],fill:[25,37],filter:[0,3,47,49],find:[14,38,45,47],finish:[20,50],finit:31,first:[0,8,11,12,20,22,23,24,25,30,39,41],fit:35,flag:[0,3,24,25,27,49],flexibl:40,flicker:27,flow:[29,47],follow:[2,3,5,6,8,10,13,14,15,18,19,20,22,23,24,25,27,28,32,36,37,42,44,48,49],footprint:30,forc:[24,27,31,37],force_cpu:24,force_environment_reset:[27,37],force_int_bin:31,forced_attention_s:35,form:[4,18,35,48],format:36,formul:[5,6],forward:[24,28],found:[3,43,50],frac:[6,7,13,23],fraction:[7,11],frame:[0,27],frame_skip:27,framework:[0,3,24,36,47,49],framework_typ:0,free:[27,45],freeglut3:42,from:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,33,35,36,37,38,39,40,41,42,44,45,47,49,50],full:[3,10,17,31,49],fulldiscreteactionspacemap:31,fulli:24,func:[3,49],futur:[0,3,10,25,48],future_measurements_weight:4,gae:[5,7,11],gae_lambda:[5,7,11],game:[3,25,27,45,47,49,50],game_ov:25,gamma:[5,6,8,12,13,14,15,17,18,19,20,21,23],gap:[21,48],gather:40,gaussian:[11,28],gener:[0,5,7,11,12,24,27,28,32,35,36,42,50],general_network:36,get:[3,4,7,8,9,10,11,12,14,15,17,19,21,24,25,27,28,35,39,41,42,44,49],get_act:28,get_action_from_us:27,get_available_kei:27,get_first_transit:25,get_goal:27,get_last_env_respons:27,get_last_transit:25,get_output_head:36,get_predict:[3,49],get_random_act:27,get_rendered_imag:[27,37],get_reward_for_goal_and_st:35,get_state_embed:[3,49],get_transit:25,get_transitions_attribut:25,get_variable_valu:24,get_weight:24,gfortran:42,gif:0,git:42,github:[37,42,44,47],given:[0,1,2,3,4,5,8,10,11,24,25,27,28,30,31,32,35,36,39,49],given_weight:24,global:[3,24,41,49],global_network:24,glx:42,goal:[1,2,3,4,6,24,25,27,32,39,41,48,49],goal_from_st:35,goal_nam:35,goal_spac:27,goal_vector:4,goals_spac:32,goalsspac:[32,35],goaltorewardconvers:35,going:29,good:[37,38],gpu:[0,24],gracefulli:50,gradient:[3,5,6,7,11,18,20,24,36,48,49],gradientclippingmethod:24,gradients_clipping_method:24,granular:32,graph:0,graphmanag:39,grayscal:[30,35],greedili:39,group:38,grow:23,guidelin:48,gym:[42,45],gym_environ:[27,50],gymenviron:27,gymenvironmentparamet:37,hac:48,had:44,hand:[16,30,39,48],handl:4,handle_episode_end:[3,27,49],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[38,48],harder:38,has:[0,3,16,20,21,25,28,30,39,41,44,48,49],has_glob:24,has_target:24,hat:[6,7,13,23],have:[0,3,4,6,24,27,28,30,31,32,39,41,44,49],head:[1,2,3,5,6,10,12,16,19,20,24,28,36,41,49],headparamet:24,heads_paramet:24,health_gath:27,heat:6,heatup:[28,39],help:[21,25,38,39,48],here:[37,39],heurist:[11,28],hide:41,hierarch:[35,39],hierarchi:[3,39,48,49],high:[8,11,30,31,35,38],high_i:35,high_kl_penalty_coeffici:11,high_x:35,higher:11,highest:[5,6,10,21,28,30,31,35],highli:[0,37,48],hindsight:[9,32,48],hindsight_goal_selection_method:32,hindsight_transitions_per_regular_transit:32,hindsightgoalselectionmethod:32,hold:[12,24,25,32,38,39,41],horizont:[42,47,50],host:42,hostnam:0,hot:35,how:[4,7,11,28,40,42,48,50],hrl:32,html:42,http:[18,32,42],hub:42,huber:22,huber_loss_interv:22,human:[0,27],human_control:27,hyper:[36,44],hyperparamet:36,ident:24,identifi:[24,35],ignor:27,imag:[0,24,27,30,31,35,37,41,50],image1:42,imit:[3,25,43,48],impact:24,implement:[3,7,11,24,26,27,28,32,36,37,40,44,48,50],impli:50,implment:34,importance_weight:24,importance_weight_trunc:6,importantli:39,improv:[5,16,23,27,39,48],includ:[0,3,4,27,29,30,34,41,45,49,50],increas:[11,21,30,48],increment:[3,49],index:[0,2,25,27,30,31,32,35],indic:35,inf:[30,35],infer:[3,24,27,49],infinit:48,info:[3,12,25,35,37,49],info_as_list:25,inform:[3,4,18,25,27,29,38,39,42,45,49],inherit:[3,36,37],init_environment_dependent_modul:[3,49],initi:[3,4,11,21,24,25,36,39,47,49],initial_feed_dict:24,initial_kl_coeffici:11,innov:48,input:[1,2,3,4,8,12,14,15,17,19,20,21,24,29,35,39,41,49],input_embedders_paramet:24,input_high:30,input_low:30,input_space_high:31,input_space_low:31,inputembedderparamet:24,inputfilt:39,insert:[20,25],inspect:0,instal:[42,50],instanc:[3,33,35,41],instanti:[3,27,39],instead:[0,3,7,18,21,24,30,31,39,48,49],instruct:50,intact:[12,44],integ:[0,30,31],integr:[37,39,40,47],intel:47,intend:[10,24,28,39],interact:[25,39,40,47,50],interest:[24,38],interfac:[27,38,40,45],intermedi:20,intern:[3,10,18,24,25,29,39,49,50],interpol:30,intersect:48,interv:22,intrins:25,intro:47,introduc:48,invers:[27,45],invok:39,involv:36,is_empti:25,is_valid_index:35,item:25,iter:[3,5,6,8,11,16,24,49],its:[0,3,13,23,24,25,28,35,39,42,48,49,50],itself:[24,35,50],job:0,job_typ:0,joint:27,json:0,jump:[4,31],jupyt:36,just:[3,11,21,23,37,39,41,49,50],kapa:22,keep:[15,25,30,50],kei:[2,20,24,25,27,32,36,38,42,50],key_error_threshold:32,key_width:32,keyboard:[27,50],keyword:24,kl_coeffici:24,kl_coefficient_ph:24,know:[3,48,49,50],knowledg:[3,39,49],known:[25,38,44,48],kubeconfig:34,kubernet:42,kubernetes_orchestr:34,kubernetesparamet:34,kwarg:[24,27],l2_norm_added_delta:20,l2_regular:24,lack:38,lamb:28,lambda:[5,7,11,28],lane:2,larg:[28,30,45],larger:24,last:[4,6,11,20,25,27,30],last_env_respons:27,lastli:39,later:[0,3,24,49,50],latest:[18,20,39,42],layer:[24,28,32,39,41],lazi:[25,30],lazystack:30,lbfg:24,ld_library_path:42,lead:28,learn:[0,3,4,5,6,8,9,10,12,13,14,15,16,19,22,23,24,25,27,28,30,38,39,41,43,44,45,48,49],learn_from_batch:[3,36,39,49],learner:24,learning_r:[24,32],learning_rate_decay_r:24,learning_rate_decay_step:24,least:[41,48],leav:[11,12],left:[2,6,48],length:[4,5,7,11,18,20,24,25],less:[16,48],level:[0,3,24,27,37,49,50],levelmanag:[3,39,49],levelselect:27,libatla:42,libav:42,libavformat:42,libbla:42,libboost:42,libbz2:42,libfluidsynth:42,libgl1:42,libglew:42,libgm:42,libgstream:42,libgtk2:42,libgtk:42,libjpeg:42,liblapack:42,libnotifi:42,libopen:42,libosmesa6:42,libportmidi:42,librari:[27,42,45],libsdl1:42,libsdl2:42,libsdl:42,libsm:42,libsmpeg:42,libswscal:42,libtiff:42,libwebkitgtk:42,libwildmidi:42,like:[27,35,39,41,42,48],likelihood:[7,11],line:[3,39,49,50],linear:31,linearboxtoboxmap:31,linearli:31,list:[0,3,4,24,25,27,28,30,31,35,36,49,50],load:[0,38,40,50],load_memory_from_file_path:50,local:[3,41,42,49],locat:[22,25,30,48],log:[0,3,5,6,10,49],log_to_screen:[3,49],logger:[0,3,49],look:[37,42],loop:39,loss:[1,2,3,6,7,10,11,13,14,15,22,23,24,28,36,41,49],lot:[28,38,44,48],low:[8,11,30,31,35],low_i:35,low_x:35,lower:[0,32,39],lowest:[30,31,35],lstm:41,lumin:30,lvert:[6,13,23],lvl:50,mai:[0,24,43,50],main:[3,36,39,41,43,49,50],mainli:40,major:28,make:[0,3,24,27,36,38,42,44,48,49],manag:[3,24,40,42,49],mandatori:[35,37,41],mani:[3,16,43,44],manner:[11,17,18,21,30,39],manual:42,map:[3,24,27,29,30,31,35,36,49],mark:25,markdown:49,mask:[12,31],masked_target_space_high:31,masked_target_space_low:31,master:[3,39,42,49],match:[2,20,24,35],mathbb:[5,6],mathop:5,max:[5,6,13,18,23,30],max_a:[12,15,20,21],max_action_valu:25,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_kl_diverg:6,max_over_num_fram:27,max_simultaneous_selected_act:35,max_siz:32,max_spe:27,maxim:[4,14],maximum:[0,13,15,20,21,25,27,28,30,32],mean:[0,2,7,8,9,10,11,19,24,28,30,31,35,38,48],meant:41,measur:[3,4,24,27,30,35,37,48,49],measurements_nam:35,mechan:[29,40,44,50],memor:48,memori:[3,23,25,30,36,39,40,42,47,48,49],memory_backend:42,memorygranular:32,memoryparamet:[3,36],merg:[24,27],mesa:42,method:[0,5,7,11,18,24,30,32],metric:[0,35,38],mid:6,middlewar:[20,24,41],middleware_paramet:24,middlewareparamet:24,midpoint:22,might:[3,10,27,36,41,49],min:[6,7,13,21,23],min_reward_threshold:0,mind:50,minim:[2,4,13],minimap_s:27,minimum:[0,7,30],mix:[3,7,11,20,21,48],mixedmontecarloalgorithmparamet:17,mixer1:42,mixtur:[17,24],mjkei:42,mjpro150:42,mjpro150_linux:42,mkdir:42,mmc:[17,48],mmc_agent:17,mode:[21,24,26,33,34,39,40,42,50],model:[0,17,19,24,47,50],modif:48,modifi:6,modul:[3,36,39,40,49],modular:[36,39,41,47],monitor:40,mont:[3,21],monte_carlo_mixing_r:[17,21],more:[3,8,18,24,30,36,38,39,41,42,47,49,50],moreov:38,most:[3,10,20,24,25,28,41,44,48,49,50],mostli:[30,39],motiv:39,move:[6,7,11,30,38,44],mp4:0,mse:[2,6,14,15,22],much:[7,11,39,48],mujoco:[27,31,37,42,45],mujoco_kei:42,mujoco_pi:42,multi:[11,24,35,41],multiarrai:[3,49],multidimension:35,multipl:[4,7,11,18,24,27,28,30,31,32,35,38,39,44,47,50],multipli:[4,10,24,30],multiselect:31,multitask:[27,45],must:[24,30,35,44],mxnet:50,n_step:[20,23,25,32],n_step_discounted_reward:25,n_step_q_ag:18,nabla:[6,8],nabla_:8,nabla_a:8,naf:48,naf_ag:19,nafalgorithmparamet:19,name:[3,24,25,27,30,35,36,42,49,50],namespac:34,nasm:42,nativ:[0,27,37,45],native_rend:0,navig:3,ndarrai:[3,24,25,27,28,30,31,35,37,49],nearest:20,neat:38,nec:[0,48],nec_ag:20,necalgorithmparamet:20,necessari:[3,20,24,49],necessarili:30,need:[0,3,6,23,24,27,28,35,36,39,44,48,49,50],neg:[4,30],neighbor:20,neon_compon:36,nervanasystem:42,network:[0,3,24,28,36,39,44,47,48,49,50],network_input_tupl:24,network_nam:[3,49],network_param:28,network_paramet:24,network_wrapp:[3,24,49],networkparamet:[3,24,28,36],networkwrapp:[3,49],neural:[3,17,24,41,44],never:24,new_value_shift_coeffici:[20,32],new_weight:24,newli:[21,37,48],next:[3,8,14,15,19,21,22,25,27,39,49,50],next_stat:25,nfs_data_stor:26,nfsdatastoreparamet:26,nice:50,no_accumul:24,node:[24,41],nois:[8,9,19,28,39],noise_percentage_schedul:28,noisi:[10,23,28],non_episod:32,none:[0,3,7,8,11,24,25,27,28,30,31,35,37,49],norm:24,norm_unclipped_grad:24,norm_unclippsed_grad:24,normal:[3,4,10,28,29,30,35],note:[20,24,28,49],notebook:36,notic:[24,48],notori:[38,44,48],now:[7,37],nstepqalgorithmparamet:18,nth:23,num_act:[20,32,35],num_bins_per_dimens:31,num_class:32,num_consecutive_playing_step:[3,8,49],num_consecutive_training_step:[3,49],num_gpu:0,num_neighbor:32,num_predicted_steps_ahead:4,num_speedup_step:27,num_steps_between_copying_online_weights_to_target:[8,18],num_steps_between_gradient_upd:[5,6,10,18],num_task:0,num_training_task:0,num_transitions_to_start_replai:6,num_work:0,number:[0,2,4,5,6,8,10,12,13,18,20,22,23,24,25,27,28,30,31,32,38,45,50],number_of_knn:20,numpi:[3,24,25,27,28,30,31,35,37,49],nvidia:42,object:[0,3,23,24,27,28,30,32,39,49],observ:[0,3,4,11,24,25,27,29,37,39,49],observation_reduction_by_sub_parts_name_filt:30,observation_rescale_size_by_factor_filt:30,observation_rescale_to_size_filt:30,observation_space_s:24,observation_space_typ:27,observation_stat:30,observation_typ:27,observationspac:35,observationspacetyp:27,observationtyp:27,obtain:[3,49],off:[6,40,48],offer:[27,45],often:[38,39,41],old:[7,11,24,48],old_weight:24,onc:[0,7,10,11,12,13,14,15,17,18,21,22,23,24,35,50],one:[0,3,6,16,20,21,24,25,27,28,29,32,35,37,38,41,48,49],ones:[37,48],onli:[0,3,4,5,6,7,10,11,12,13,15,16,18,20,22,23,24,25,27,28,30,31,37,39,48,49,50],onlin:[8,12,13,14,15,17,18,19,20,21,22,23,24,39,41],online_network:24,onnx:[0,24],onto:29,open:[0,27,45],openai:[42,45],opencv:42,oper:[21,24,30],optim:[3,4,6,24,43,48],optimization_epoch:7,optimizer_epsilon:24,optimizer_typ:24,option:[6,10,24,27,31,35,36,38,40,41,50],orchestr:[40,42,47],order:[0,3,5,6,7,8,10,11,14,15,16,18,19,20,21,22,24,25,29,30,31,38,39,41,44,48,49],org:[18,32],origin:[18,30,31,44],ornstein:[8,9,28],other:[0,2,10,16,21,24,27,29,30,32,38,39,48],otherwis:[11,12,24,27,28,35],ou_process:28,our:7,out:[2,14,15,28,29,31,38,42,47,48,50],outcom:[28,39],output:[0,4,6,8,12,13,19,20,24,28,29,30,35,36,41],output_0_0:24,output_observation_spac:30,outputfilt:39,outsid:[4,28],over:[3,7,10,11,18,20,23,24,25,28,30,31,38,39,48,49],overestim:8,overfit:11,overhead:0,overlai:38,override_existing_kei:32,overriden:36,overview:39,overwhelm:39,overwritten:24,own:[24,36],p_j:[13,23],page:[3,44],pair:[0,35],pal:[21,48],pal_ag:21,pal_alpha:21,palalgorithmparamet:21,paper:[5,10,13,18,20,22,27,32,44],parallel:[6,24,38,41],parallel_predict:24,param:[3,24,25,26,27,28,33,34,36,37,49],paramet:[2,3,4,5,6,7,8,10,11,13,17,18,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,44,47,49,50],parameter_nois:28,parameters_server_host:0,parent:[3,24,49],parent_path_suffix:[3,24,49],parmet:3,pars:39,part:[0,12,24,25,28,30,31,40,41,44,48],part_nam:30,partial:31,partialdiscreteactionspacemap:31,particular:4,particularli:[27,28,35,44,48],pass:[0,4,8,9,19,20,24,27,28,29,37,38,39,41,50],patamet:20,patchelf:42,patchelf_0:42,path:[0,3,24,36,37,42,49,50],pattern:39,pdf:32,penal:[7,8,11],penalti:11,pendulum_hac:37,pendulum_with_go:37,pendulumwithgo:37,per:[0,3,4,35,36,39,49],percentag:28,percentil:28,perceptron:41,perform:[0,3,6,24,25,30,32,37,38,39,48,49],period:[41,50],persist:3,persistent_advantage_learn:21,perspect:13,phase:[3,6,7,8,9,11,24,27,28,39,49],phi:[13,23],physic:[27,45],pi_:[6,7],pick:27,pickl:50,pip3:42,pip:42,pixel:27,place:[31,38,39],placehold:[24,28],plai:[0,3,10,12,14,15,18,28,36,38,49],plain:41,planarmap:27,planarmapsobservationspac:30,platform:[27,45],pleas:[18,44],plu:24,plugin:42,point:[30,35,39,40],polici:[1,3,4,5,6,9,12,18,19,20,26,36,39,40,41,42,43,47,48],policy_gradient_rescal:[5,7,10,11],policy_gradients_ag:10,policygradientalgorithmparamet:10,policygradientrescal:[5,7,10,11],policyoptimizationag:36,popul:39,popular:[27,45],port:0,posit:[4,30],possibl:[2,3,4,20,28,31,35,38,41,47,48,49,50],post:[29,47],post_training_command:[3,49],power:[27,45],ppo:[7,11,48],ppo_ag:11,ppoalgorithmparamet:11,pre:[8,28,29],predefin:[12,21,28,50],predict:[1,2,3,5,6,7,8,11,12,13,14,15,21,22,23,24,28,41,48,49],prediction_typ:[3,49],predictiontyp:[3,49],prefect:48,prefer:24,prefix:[3,49],prep:42,prepar:[3,49],prepare_batch_for_infer:[3,49],present:[16,20,27,30,48],preset:[0,5,36,37,39,40,42,50],press:[38,50],prevent:[8,11,39],previou:30,previous:[11,24],print:[0,3,50],print_networks_summari:0,priorit:[23,32],prioriti:[23,32],privat:35,probabilit:[5,6],probabl:[3,5,6,10,12,13,23,25,28,36,48,49],procedur:6,process:[0,3,8,9,24,28,29,30,31,36,38,39,41,44,47,49],produc:24,progress:24,project:[13,23],propag:7,propagate_updates_to_dnd:20,properti:[24,32,36,37,42],proport:32,provid:[24,40],proxi:39,proxim:3,pub:[33,34,42],publish:44,purpos:[0,3,10],pursuit:2,pybullet:[27,45],pygam:[0,42],pytest:42,python3:42,python:[27,32,36,42,45,47],qr_dqn_agent:22,quad:6,qualiti:27,quantil:[3,48],quantileregressiondqnalgorithmparamet:22,queri:[20,24,39,48],question:48,quit:38,r_i:[5,18],r_t:[4,6,7,23],rainbow:[3,36,48],rainbow_ag:36,rainbow_dqn_ag:23,rainbowag:36,rainbowagentparamet:36,rainbowalgorithmparamet:36,rainbowdqnalgorithmparamet:23,rainbowexplorationparamet:36,rainbowmemoryparamet:36,rainbownetworkparamet:36,rais:[3,25,49],ramp:[36,39],random:[0,18,27,28,35,39,44],random_initialization_step:27,randomli:[25,39],rang:[4,7,8,11,13,23,27,28,30,31,35,48],rare:20,rate:[0,6,17,20,24,27,41],rate_for_copying_weights_to_target:[6,8],rather:[4,38],ratio:[6,7,11,17,30],ratio_of_replai:6,raw:[27,45],reach:[0,11,35],read:26,readabl:39,readm:42,real:3,reason:[30,44],rebuild_on_every_upd:32,receiv:[24,25],recent:[3,23,24,48,49],recommend:37,redi:[33,34,42],redispubsub:42,redispubsubmemorybackendparamet:33,reduc:[1,2,10,11,21,24,30,39,48],reduct:30,reduction_method:30,reductionmethod:30,redund:30,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,40,42],referenc:3,regard:[3,49],region:[6,48],regist:[3,49],register_sign:[3,49],registri:42,regress:[2,3,48],regula:[6,7,11],regular:[5,7,10,11,18,20,24,28,31,32,48],regularli:24,reinforc:[3,5,8,9,10,13,14,15,16,18,21,22,23,27,28,38,39,41,43,44,45,48],rel:28,relat:[24,42],relationship:48,releas:[47,48],relev:[3,12,28,30,49],remov:30,render:[0,3,27,37],reorder:30,repeat:[27,39],replac:[28,30,32,42],replace_mse_with_huber_loss:24,replai:[1,2,3,6,8,12,13,14,15,18,20,21,22,23,32,39,48,49,50],replay_buff:50,replicated_devic:24,repo:37,repositori:47,repres:[0,7,11,13,23,24,25,27,28,31,35,50],represent:41,reproduc:[39,44],request:[3,24,49],requir:[3,24,26,28,30,38,41,42,48,49],requires_action_valu:28,rescal:[4,5,7,10,11,24,29,30],rescale_factor:30,rescaleinterpolationtyp:30,rescaling_interpolation_typ:30,research:[27,44,45],reset:[3,20,24,27,28,37,49],reset_accumulated_gradi:24,reset_evaluation_st:[3,49],reset_gradi:24,reset_internal_st:[3,27,49],resourc:[40,42],respect:[8,25,27],respons:[3,25,27,39,49],rest:[24,25,31,42],restart:37,restor:[0,3,49],restore_checkpoint:[3,49],result:[3,4,13,14,15,16,22,23,24,30,31,44,48,49,50],ret:6,retrac:6,retriev:[20,32],return_additional_data:32,reus:39,reusabl:41,reward:[0,1,2,3,4,8,10,17,18,23,24,25,27,29,35,37,38,39,48,49],reward_test_level:0,reward_typ:35,rgb:[27,30,35],rho:[6,8],rho_t:6,right:[2,3,6,28,31,38,48,49],rl_coach:[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,37,42,49,50],rms_prop_optimizer_decai:24,rmsprop:24,roboschool:[27,45],robot:[27,35,45,47],roboti:42,rollout:[3,26,33,34,40,42,49,50],root:[38,42],rule:[8,12],run:[0,3,4,8,10,11,12,14,15,20,21,24,27,28,30,49,50],run_pre_network_filter_for_infer:[3,49],runphas:[3,49],runtim:42,rvert:[13,23],rvert_2:6,s3_bucket_nam:42,s3_creds_fil:42,s3_data_stor:26,s3_end_point:42,s3datastoreparamet:26,s_t:[4,5,6,8,12,13,14,15,17,18,19,21,23],sai:48,same:[3,4,7,10,17,18,21,24,27,31,32,38,41,44,48,49],sampl:[1,2,3,5,6,8,10,11,12,13,14,15,17,18,21,22,23,24,28,32,35,39,42,49],sample_with_info:35,satur:8,save:[0,3,23,24,28,42,49,50],save_checkpoint:[3,49],saver:[3,24,49],savercollect:[3,24,49],scale:[4,10,24,30,38,42,47,50],scale_down_gradients_by_number_of_workers_for_sync_train:24,scale_measurements_target:4,scaler:24,schedul:[7,28,32,39,40,42,50],scheme:[5,28,39,48],schulman:11,sci:42,scienc:44,scipi:[30,42],scope:24,scratch:48,scratchpad:0,screen:[3,27,37,50],screen_siz:27,script:39,second:[0,24,38,48,50],section:[42,43,45],see:[3,27,30,42,44,45,48,49,50],seed:[0,27,44],seen:[4,20,21,27,30,39,44,48],segment:[27,35],select:[5,12,20,24,25,28,30,31,35,37,38,39,47,50],self:[3,24,36,37,49],send:[37,41],separ:[0,3,16,30,31,41,43,48],separate_actions_for_throttle_and_brak:27,seper:10,sequenti:[4,25,32],serv:[7,10,41],server:0,server_height:27,server_width:27,sess:[3,24,49],session:[3,24,49],set:[0,2,3,4,5,6,7,8,11,13,14,15,17,20,21,23,24,25,27,28,30,31,35,36,40,44,45,47,48,49,50],set_environment_paramet:[3,49],set_goal:27,set_incoming_direct:[3,49],set_is_train:24,set_sess:[3,49],set_variable_valu:24,set_weight:24,setup:[3,42,49],setup_logg:[3,49],setuptool:42,sever:[0,3,7,10,11,12,24,27,28,30,36,37,38,39,41,45,48,49,50],shape:[24,30,35],share:[0,3,24,32,41,49],shared_memory_scratchpad:0,shared_optim:24,shift:[31,39],shine:38,should:[0,3,4,7,11,12,18,21,24,25,27,30,32,35,36,37,40,49,50],should_dump:0,shouldn:12,show:44,shown:44,shuffl:25,side:[3,49],sigma:28,signal:[3,39,49],signal_nam:[3,49],significantli:16,sim:6,similar:[7,16,18,25,27,31,48],simpl:[10,32,36,37,41,47,48,50],simplest:48,simplif:48,simplifi:[7,38,41],simul:[27,37,45,50],simultan:7,sinc:[3,7,8,10,18,20,21,23,24,28,30,49],singl:[3,4,5,6,7,11,12,16,17,18,24,25,27,28,31,35,38,39,41,49],size:[24,25,28,30,31,32,35],skill:48,skip:[27,39],slave:[3,49],slice:25,slow:[24,50],slower:[0,16,24],slowli:8,small:[7,20,32],smaller:28,smooth:38,soft:[8,11,19],softmax:28,softwar:42,solut:48,solv:[30,37,45,47],some:[0,3,11,24,25,28,30,36,37,38,41,44,48,49,50],sort:22,sourc:[0,1,2,3,4,5,6,7,8,10,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,37,42,45,49],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,28,29,30,31,32,39,47,49],spacesdefinit:[3,24,49],spatial:48,spawn:[40,42],special:16,specif:[0,3,12,16,20,24,25,36,39,50],specifi:[0,24,27,28,30,37,40,50],speed:[24,30,48],speedup:50,spread:[30,31],squar:30,squeeze_list:24,squeeze_output:24,src:42,stabil:[6,18,24,48],stabl:[41,48],stack:[3,29,30,35,49],stack_siz:[24,30],stacking_axi:30,stage:41,stai:44,standard:[7,10,11,12,28,30,38],starcraft2_environ:27,starcraft2environ:27,starcraft:[35,45],starcraftobservationtyp:27,start:[3,6,8,11,16,21,25,30,31,37,42,49],state:[1,2,3,4,5,6,7,8,9,10,11,12,14,15,16,17,18,19,20,21,22,23,24,25,27,30,32,35,36,37,39,41,43,48,49],state_key_with_the_class_index:[2,32],state_spac:27,state_valu:25,statist:[3,10,30,47,49],stdev:28,steep:28,step:[0,3,4,5,6,7,8,10,11,12,13,14,15,17,19,20,21,22,23,24,25,27,28,30,36,37,38,39,48,49,50],stepmethod:[8,18],stochast:39,stop:[0,27],store:[0,3,20,23,25,27,30,32,38,39,40,42,47,49,50],store_transitions_only_when_episodes_are_termin:23,str:[0,2,3,4,18,24,25,27,28,30,31,35,49],strategi:[27,45],stream:[16,40],strict:44,string:[0,24,27],structur:[0,3,25,32,36,39,49],stuff:24,style:28,sub:[31,32,33,34,35,36,39,42,50],sub_spac:35,subset:[38,44,48],subtract:21,succeed:27,success:[0,27,48],suffer:38,suffici:25,suffix:[3,24,49],suggest:36,suit:[0,45],suitabl:[40,50],sum:[4,7,10,17,24,25],sum_:[5,13,17,18,20,23],summari:[0,3,49],supervis:48,suppli:[3,49],support:[0,3,24,27,28,38,41,42,43,45,47,50],sure:[0,42,44],surrog:7,swig:42,swingup:27,symbol:24,sync:[3,24,39,40,49],synchron:[0,24,39,41],t_max:[10,18],tag:42,take:[0,10,11,16,20,21,24,27,28,29,37,38,39],taken:[1,2,4,5,6,7,8,11,13,16,20,21,22,23,24,25,27,28],tanh:8,tar:42,target:[0,1,2,3,4,5,6,7,8,11,12,13,14,15,17,18,19,20,21,22,23,24,27,30,31,35,36,39,41,49],target_act:31,target_kl_diverg:11,target_network:24,target_success_r:27,targets_horizon:18,task:[0,1,2,27,30,36,38,45],task_index:0,techniqu:[7,11,47,48],technolog:40,teh:24,temperatur:28,temperature_schedul:28,tensor:[3,24,49],tensorboard:0,tensorflow:[0,3,24,49,50],tensorflow_support:24,term:[6,7,11],termin:[3,8,25,39,49],test:[0,3,5,6,8,9,10,11,24,36,44,47,50],test_using_a_trace_test:0,text:6,textrm:39,than:[0,3,11,24,28,38,41,49],thei:[3,20,21,24,28,38,39,40,48,49,50],them:[4,5,10,18,24,25,27,30,35,37,38,41],therefor:[0,8,24,29,48],theta:[6,7,8,13,23,28],theta_:[6,7],thi:[0,3,4,5,6,7,8,10,11,12,16,18,20,23,24,25,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,44,48,49,50],thing:38,those:[0,3,8,12,14,15,16,20,25,28,31,39,41,43,48,49],thousand:[11,12,13,14,15,17,21,22,23],thread:24,three:[3,40,41,42,43],threshold:[11,20,30],through:[0,3,4,8,9,10,11,12,20,21,24,36,37,39,41,49],tild:8,time:[0,4,21,24,28,31,32,38,41,48],time_limit:37,timestep:[4,10],timid:42,tmp:0,togeth:[3,18,25,39,49],toggl:38,too:11,tool:[38,42,48],top:[24,27,29,30,32,37,38,48],torqu:27,total:[0,3,10,11,17,20,21,25,32,36,38,48,49],total_loss:24,total_return:25,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:28,train:[0,3,16,24,28,33,34,36,37,38,39,40,41,44,47,48,49],train_and_sync_network:24,train_on_batch:24,trainer:[26,40],transfer:[27,33,45],transit:[1,2,3,4,5,6,8,10,11,13,14,15,18,20,21,22,23,32,36,39,40,49],transition_idx:25,tri:48,trick:44,tricki:38,trigger:[27,42],truncat:6,truncated_norm:28,trust:[6,48],ttf2:42,tune:28,tupl:[1,2,3,8,24,25,27,32,35,36],turn:[2,48],tutori:[36,37],tweak:[3,49],two:[8,10,18,24,27,28,29,30,31,35,37,40,41,50],txt:42,type:[0,3,10,16,24,27,30,35,36,39,41,47,48,49,50],typic:[7,11,24,48,50],ubuntu16:42,uhlenbeck:[8,9,28],uint8:30,unbound:35,uncertain:28,uncertainti:28,unchang:11,unclip:[3,36,49],uncorrel:18,undeploi:40,under:[3,24,36,50],underbrac:5,understand:50,unifi:7,uniformli:[27,28,31,35],union:[3,25,27,28,31,35,49],uniqu:24,unit:38,unlik:11,unmask:31,unnecessari:0,unshar:[3,49],unsign:30,unspecifi:24,unstabl:[38,44],until:[0,6,10,11,20,23,28],unus:24,unzip:42,updat:[3,6,7,8,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,28,36,37,38,39,41,42,48,49],update_discounted_reward:25,update_filter_internal_st:[3,49],update_log:[3,49],update_online_network:24,update_step_in_episode_log:[3,49],update_target_network:24,update_transition_before_adding_to_replay_buff:[3,49],upgrad:42,upon:[3,5,36,49],upper:[6,28],usag:[31,47],use:[0,1,2,3,4,5,6,8,9,10,12,14,15,19,24,25,26,27,28,30,31,32,35,36,37,39,41,42,47,48,49,50],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:27,use_kl_regular:[7,11],use_non_zero_discount_for_terminal_st:8,use_separate_networks_per_head:24,use_target_network_for_evalu:8,use_trust_region_optim:6,used:[0,2,3,5,6,7,8,10,11,12,13,17,18,19,20,21,22,24,27,28,30,31,32,33,34,36,37,39,40,41,44,49,50],useful:[0,3,4,23,24,28,30,35,44,48,49,50],user:[24,27,28,38,39,42],userguid:42,uses:[0,1,7,11,16,25,26,28,34,39,40,42,44,48,50],using:[0,3,5,6,7,8,10,11,14,15,17,18,19,20,21,23,24,26,27,28,30,33,36,37,38,40,45,48,49,50],usr:42,usual:[30,39],util:[3,38,49],v_max:13,v_min:13,val:[3,35,49],valid:[0,35],valu:[0,2,3,4,5,6,7,8,11,12,13,14,15,16,18,19,20,21,23,24,25,27,28,30,31,32,35,36,39,41,42,43,48,49],valuabl:38,value_targets_mix_fract:[7,11],valueexcept:[3,49],valueoptimizationag:36,van:4,vari:41,variabl:[24,27,42],variable_scop:24,varianc:[10,28,38,48],variant:[28,32,48],variou:[3,25,32,47],vector:[3,4,8,9,11,12,24,27,30,35,37,41,48,49],vectorobservationspac:30,verbos:27,veri:[0,7,8,10,16,20,38,48,50],version:[7,11,25],versu:24,vertic:24,via:[2,12],video:[0,3,27],video_dump_method:0,view:38,viewabl:[3,49],visit:44,visual:[0,3,27,45,47],visualization_paramet:27,visualizationparamet:[3,27],vizdoom:[42,45],vote:28,wai:[3,7,11,28,31,37,39,41,47,49,50],wait:[5,24,40],walk:37,want:[3,4,23,24,25,30,31,32,49],warn:[28,30,31],wasn:25,weather_id:27,websit:[27,47],weight:[4,5,6,7,8,11,12,13,14,15,17,18,19,20,21,22,23,24,28,39,41,48],well:[20,24,28,35,48],went:11,were:[4,13,14,15,16,20,22,23,24,25,31,44],west:42,wget:42,what:[11,48],when:[0,3,4,5,6,7,8,9,10,11,20,24,25,26,27,28,30,33,34,36,37,38,49,50],whenev:40,where:[2,3,4,5,6,7,11,12,13,16,18,20,21,23,24,25,27,28,30,31,35,38,48,49],which:[0,1,2,3,5,6,7,8,10,11,12,16,18,19,20,21,22,24,25,26,27,28,30,32,33,34,35,36,37,38,39,40,41,43,44,45,47,48,49,50],who:39,why:[38,39],window:[30,31],wise:30,within:[0,7,11,19,28,35,38],without:[5,11,31,32,38,48,50],won:[4,24],wont:24,work:[3,18,24,28,30,31,38,39,48,49,50],workaround:0,workdir:42,worker:[0,3,18,24,26,30,32,33,34,38,40,41,42,48,49,50],worker_devic:24,worker_host:0,wors:48,would:[24,42,48],wrap:[27,30,39,45],wrapper:[3,24,25,27,35,41,49],write:[0,3,49],written:[3,23,26,49],www:42,xdist:42,y_t:[8,12,14,15,17,19,20,21],year:48,yet:[16,37],you:[4,30,32,36,37,42,47,50],your:[36,37,42,50],yuv:30,z_i:[13,23],z_j:[13,23],zero:[2,14,15],zip:42,zlib1g:42},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","ACER","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":20,"function":19,"new":[36,37],"switch":50,Adding:[36,37],Using:37,acer:6,across:48,action:[4,5,6,7,8,9,10,11,12,19,20,31,35,48],actioninfo:25,actor:[5,9],addit:[0,50],additivenois:28,advantag:[19,21],agent:[3,36,39,50],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,43,48,50],api:37,architectur:24,attentionactionspac:35,backend:33,balancedexperiencereplai:32,batch:25,behavior:1,benchmark:44,between:50,blizzard:27,boltzmann:28,bootstrap:[12,28],boxactionspac:35,build:42,can:48,carla:27,carlo:17,categor:[13,28],choos:[4,5,6,7,8,9,10,11,12,19,20],clip:7,clone:[1,42],coach:[37,38,40,42,47],collect:48,compar:38,compoundactionspac:35,condit:2,config:42,contain:42,continu:[7,11,48],continuousentropi:28,control:[20,27,39],copi:41,core:25,creat:42,critic:[5,9],dashboard:38,data:26,deep:[8,15,50],deepmind:27,demonstr:48,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],design:41,determinist:8,direct:4,discret:[5,6,10,48],discreteactionspac:35,distribut:[40,42],distributedtaskparamet:0,doe:48,doubl:14,dqn:[12,13,14,16,22],duel:16,dump:50,egreedi:28,environ:[27,37,45,48,50],envrespons:25,episod:[20,25,32],episodicexperiencereplai:32,episodichindsightexperiencereplai:32,episodichrlhindsightexperiencereplai:32,evalu:50,experiencereplai:32,explor:28,explorationpolici:28,featur:46,file:42,filter:[29,30,31],flag:50,flow:39,framework:50,from:48,futur:4,gener:16,gif:50,goal:35,gradient:[8,10],graph:39,greedi:28,gym:[27,37],have:48,hierarch:9,horizont:40,human:[48,50],imag:42,imageobservationspac:35,imit:[2,50],implement:42,input:30,interfac:42,keep:41,kubernet:34,learn:[2,18,21,47,50],level:39,manag:39,memori:[32,33],mix:17,mont:17,more:48,multi:50,multipl:48,multiselectactionspac:35,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,41],networkwrapp:24,neural:20,nfsdatastor:26,node:[48,50],non:32,normal:19,observ:[30,35],observationclippingfilt:30,observationcropfilt:30,observationmoveaxisfilt:30,observationnormalizationfilt:30,observationreductionbysubpartsnamefilt:30,observationrescalesizebyfactorfilt:30,observationrescaletosizefilt:30,observationrgbtoyfilt:30,observationsqueezefilt:30,observationstackingfilt:30,observationtouint8filt:30,openai:[27,37],optim:[7,11],orchestr:34,ouprocess:28,out:40,output:31,pain:48,parallel:48,paramet:0,parameternois:28,persist:21,plai:50,planarmapsobservationspac:35,polici:[7,8,10,11,28],predict:4,prerequisit:42,presetvalidationparamet:0,prioritizedexperiencereplai:32,process:48,proxim:[7,11],push:42,qdnd:32,quantil:22,rainbow:23,redispubsubbackend:33,regress:22,reinforc:47,render:50,repositori:42,reward:30,rewardclippingfilt:30,rewardnormalizationfilt:30,rewardrescalefilt:30,run:[38,42],s3datastor:26,sampl:48,scale:40,select:48,signal:38,simul:48,singl:50,singleepisodebuff:32,solv:48,space:[35,48],starcraft:27,statist:38,step:18,store:[12,26],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],suit:27,support:40,sync:41,synchron:40,task:48,taskparamet:0,test:49,thread:50,through:50,track:38,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,50],transit:[12,25],transitioncollect:32,truncatednorm:28,type:[25,40],ucb:28,usag:[42,50],vectorobservationspac:35,visual:[38,50],visualizationparamet:0,vizdoom:27,you:48,your:48}})