1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00
Files
coach/docs/searchindex.js
guyk1971 74db141d5e SAC algorithm (#282)
* SAC algorithm

* SAC - updates to agent (learn_from_batch), sac_head and sac_q_head to fix problem in gradient calculation. Now SAC agents is able to train.
gym_environment - fixing an error in access to gym.spaces

* Soft Actor Critic - code cleanup

* code cleanup

* V-head initialization fix

* SAC benchmarks

* SAC Documentation

* typo fix

* documentation fixes

* documentation and version update

* README typo
2019-05-01 18:37:49 +03:00

1 line
58 KiB
JavaScript

Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/acer","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/policy_optimization/sac","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/data_stores/index","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/memory_backends/index","components/orchestrators/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","dist_usage","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/acer.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/policy_optimization/sac.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/data_stores/index.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/memory_backends/index.rst","components/orchestrators/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","dist_usage.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.acer_agent":{ACERAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],collect_savers:[3,1,1,""],create_networks:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],restore_checkpoint:[3,1,1,""],run_off_policy_evaluation:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[8,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[50,0,1,""],DQNAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[50,1,1,""],call_memory:[50,1,1,""],choose_action:[50,1,1,""],collect_savers:[50,1,1,""],create_networks:[50,1,1,""],get_predictions:[50,1,1,""],get_state_embedding:[50,1,1,""],handle_episode_ended:[50,1,1,""],improve_reward_model:[50,1,1,""],init_environment_dependent_modules:[50,1,1,""],learn_from_batch:[50,1,1,""],log_to_screen:[50,1,1,""],observe:[50,1,1,""],parent:[50,2,1,""],phase:[50,2,1,""],post_training_commands:[50,1,1,""],prepare_batch_for_inference:[50,1,1,""],register_signal:[50,1,1,""],reset_evaluation_state:[50,1,1,""],reset_internal_state:[50,1,1,""],restore_checkpoint:[50,1,1,""],run_off_policy_evaluation:[50,1,1,""],run_pre_network_filter_for_inference:[50,1,1,""],save_checkpoint:[50,1,1,""],set_environment_parameters:[50,1,1,""],set_incoming_directive:[50,1,1,""],set_session:[50,1,1,""],setup_logger:[50,1,1,""],sync:[50,1,1,""],train:[50,1,1,""],update_log:[50,1,1,""],update_step_in_episode_log:[50,1,1,""],update_transition_before_adding_to_replay_buffer:[50,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[22,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[11,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[23,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[24,0,1,""]},"rl_coach.agents.soft_actor_critic_agent":{SoftActorCriticAlgorithmParameters:[12,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[25,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[25,1,1,""],apply_and_reset_gradients:[25,1,1,""],apply_gradients:[25,1,1,""],collect_savers:[25,1,1,""],construct:[25,3,1,""],get_variable_value:[25,1,1,""],get_weights:[25,1,1,""],parallel_predict:[25,3,1,""],predict:[25,1,1,""],reset_accumulated_gradients:[25,1,1,""],set_variable_value:[25,1,1,""],set_weights:[25,1,1,""],train_on_batch:[25,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[25,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[25,1,1,""],apply_gradients_to_global_network:[25,1,1,""],apply_gradients_to_online_network:[25,1,1,""],collect_savers:[25,1,1,""],parallel_prediction:[25,1,1,""],set_is_training:[25,1,1,""],sync:[25,1,1,""],train_and_sync_networks:[25,1,1,""],update_online_network:[25,1,1,""],update_target_network:[25,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[25,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[26,0,1,""],Batch:[26,0,1,""],EnvResponse:[26,0,1,""],Episode:[26,0,1,""],Transition:[26,0,1,""]},"rl_coach.core_types.Batch":{actions:[26,1,1,""],game_overs:[26,1,1,""],goals:[26,1,1,""],info:[26,1,1,""],info_as_list:[26,1,1,""],n_step_discounted_rewards:[26,1,1,""],next_states:[26,1,1,""],rewards:[26,1,1,""],shuffle:[26,1,1,""],size:[26,2,1,""],slice:[26,1,1,""],states:[26,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[26,1,1,""],get_last_transition:[26,1,1,""],get_transition:[26,1,1,""],get_transitions_attribute:[26,1,1,""],insert:[26,1,1,""],is_empty:[26,1,1,""],length:[26,1,1,""],update_discounted_rewards:[26,1,1,""]},"rl_coach.data_stores.nfs_data_store":{NFSDataStore:[27,0,1,""]},"rl_coach.data_stores.s3_data_store":{S3DataStore:[27,0,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[28,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[28,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[28,0,1,""]},"rl_coach.environments.environment":{Environment:[28,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[28,2,1,""],close:[28,1,1,""],get_action_from_user:[28,1,1,""],get_available_keys:[28,1,1,""],get_goal:[28,1,1,""],get_random_action:[28,1,1,""],get_rendered_image:[28,1,1,""],goal_space:[28,2,1,""],handle_episode_ended:[28,1,1,""],last_env_response:[28,2,1,""],phase:[28,2,1,""],render:[28,1,1,""],reset_internal_state:[28,1,1,""],set_goal:[28,1,1,""],state_space:[28,2,1,""],step:[28,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[28,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[28,0,1,""]},"rl_coach.exploration_policies.additive_noise":{AdditiveNoise:[29,0,1,""]},"rl_coach.exploration_policies.boltzmann":{Boltzmann:[29,0,1,""]},"rl_coach.exploration_policies.bootstrapped":{Bootstrapped:[29,0,1,""]},"rl_coach.exploration_policies.categorical":{Categorical:[29,0,1,""]},"rl_coach.exploration_policies.continuous_entropy":{ContinuousEntropy:[29,0,1,""]},"rl_coach.exploration_policies.e_greedy":{EGreedy:[29,0,1,""]},"rl_coach.exploration_policies.exploration_policy":{ExplorationPolicy:[29,0,1,""]},"rl_coach.exploration_policies.exploration_policy.ExplorationPolicy":{change_phase:[29,1,1,""],get_action:[29,1,1,""],requires_action_values:[29,1,1,""],reset:[29,1,1,""]},"rl_coach.exploration_policies.greedy":{Greedy:[29,0,1,""]},"rl_coach.exploration_policies.ou_process":{OUProcess:[29,0,1,""]},"rl_coach.exploration_policies.parameter_noise":{ParameterNoise:[29,0,1,""]},"rl_coach.exploration_policies.truncated_normal":{TruncatedNormal:[29,0,1,""]},"rl_coach.exploration_policies.ucb":{UCB:[29,0,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[32,0,1,""],BoxDiscretization:[32,0,1,""],BoxMasking:[32,0,1,""],FullDiscreteActionSpaceMap:[32,0,1,""],LinearBoxToBoxMap:[32,0,1,""],PartialDiscreteActionSpaceMap:[32,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[31,0,1,""],ObservationCropFilter:[31,0,1,""],ObservationMoveAxisFilter:[31,0,1,""],ObservationNormalizationFilter:[31,0,1,""],ObservationRGBToYFilter:[31,0,1,""],ObservationReductionBySubPartsNameFilter:[31,0,1,""],ObservationRescaleSizeByFactorFilter:[31,0,1,""],ObservationRescaleToSizeFilter:[31,0,1,""],ObservationSqueezeFilter:[31,0,1,""],ObservationStackingFilter:[31,0,1,""],ObservationToUInt8Filter:[31,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[31,0,1,""],RewardNormalizationFilter:[31,0,1,""],RewardRescaleFilter:[31,0,1,""]},"rl_coach.memories.backend.redis":{RedisPubSubBackend:[34,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[33,0,1,""],EpisodicHRLHindsightExperienceReplay:[33,0,1,""],EpisodicHindsightExperienceReplay:[33,0,1,""],SingleEpisodeBuffer:[33,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[33,0,1,""],ExperienceReplay:[33,0,1,""],PrioritizedExperienceReplay:[33,0,1,""],QDND:[33,0,1,""],TransitionCollection:[33,0,1,""]},"rl_coach.orchestrators.kubernetes_orchestrator":{Kubernetes:[35,0,1,""]},"rl_coach.spaces":{ActionSpace:[36,0,1,""],AttentionActionSpace:[36,0,1,""],BoxActionSpace:[36,0,1,""],CompoundActionSpace:[36,0,1,""],DiscreteActionSpace:[36,0,1,""],GoalsSpace:[36,0,1,""],ImageObservationSpace:[36,0,1,""],MultiSelectActionSpace:[36,0,1,""],ObservationSpace:[36,0,1,""],PlanarMapsObservationSpace:[36,0,1,""],Space:[36,0,1,""],VectorObservationSpace:[36,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[36,1,1,""],contains:[36,1,1,""],is_valid_index:[36,1,1,""],sample:[36,1,1,""],sample_with_info:[36,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[36,0,1,""],clip_action_to_space:[36,1,1,""],contains:[36,1,1,""],distance_from_goal:[36,1,1,""],get_reward_for_goal_and_state:[36,1,1,""],goal_from_state:[36,1,1,""],is_valid_index:[36,1,1,""],sample:[36,1,1,""],sample_with_info:[36,1,1,""]},"rl_coach.spaces.ObservationSpace":{contains:[36,1,1,""],is_valid_index:[36,1,1,""],sample:[36,1,1,""]},"rl_coach.spaces.Space":{contains:[36,1,1,""],is_valid_index:[36,1,1,""],sample:[36,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"100x100":32,"160x160":31,"1_0":[14,24],"1st":29,"20x20":32,"210x160":31,"2nd":29,"50k":40,"9_amd64":43,"abstract":[37,41],"boolean":[3,26,36,50],"break":39,"case":[0,3,5,21,25,26,29,36,49,50,51],"class":[0,1,2,3,4,5,6,7,8,10,11,12,14,16,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,40,44,50],"default":[0,29,51],"enum":[25,28,36],"export":[0,25,43],"final":[8,15,16,18,22,40],"float":[3,4,5,6,7,8,10,11,12,14,18,21,22,23,25,26,28,29,31,32,33,36,37,50],"function":[0,1,3,6,7,8,11,25,28,29,36,37,38,40,42,50],"import":[6,17,29,33,38,49,51],"int":[0,3,4,5,6,7,10,14,19,21,23,24,26,28,29,31,32,33,36,50],"long":42,"new":[0,3,7,8,11,12,21,22,25,26,32,40,41,48,49,50],"return":[0,3,8,10,11,13,18,21,22,24,25,26,28,29,31,33,36,37,38,40,49,50],"short":[0,40],"static":25,"super":[37,38],"switch":[0,39],"true":[0,3,4,5,6,7,8,11,12,21,22,24,25,26,28,29,32,33,36,50],"try":[4,45,49],"while":[0,5,6,8,9,10,11,12,25,28,39,42,49,51],AWS:43,Adding:[17,48],And:[38,49],But:[39,49],Doing:49,For:[0,1,2,3,4,7,10,13,14,15,16,19,21,22,25,26,28,29,30,31,32,36,37,38,40,41,42,43,45,50,51],Has:25,Its:50,NFS:[27,43],One:[23,49,51],That:39,The:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,39,40,41,42,43,45,46,48,49,50,51],Then:[4,7,8,13,20,22],There:[7,11,25,29,30,37,38,42,51],These:[1,2,3,23,28,35,41,42,43],Use:[1,2,8,20,21],Used:29,Uses:49,Using:[8,13,15,16,43],Will:25,With:[29,48],__init__:[28,37,38],_index:[5,19],_render:38,_restart_environment_episod:38,_take_act:38,_update_st:38,a2c:49,a3c:[10,19,39,49],a_i:21,a_t:[4,5,6,8,12,13,14,15,16,18,19,20,22,24],a_valu:5,abl:[32,49],about:[3,26,40,50,51],abov:[8,12,25,40],abs:[19,33],absolut:29,acceler:20,accept:28,access:[25,37,43],accord:[0,3,4,5,6,8,12,13,19,25,26,29,36,39,40,42,50],accordingli:[21,36,40,51],account:[4,7,11,21,22,29],accumul:[3,4,5,6,10,19,21,24,25,31,49,50],accumulate_gradi:25,accumulated_gradi:25,accur:49,acer:[3,49],acer_ag:6,aceralgorithmparamet:6,achiev:[0,4,7,28,31,33,36,45,49,51],acquir:12,across:[10,18,39],act:[3,4,8,13,23,36,37,40,50],action:[1,2,3,14,15,16,17,18,19,22,23,24,25,26,28,29,30,33,37,38,40,42,50],action_idx:38,action_penalti:8,action_spac:[28,29],action_space_s:25,action_valu:[26,29],actioninfo:[3,36,40,50],actionspac:[29,36],actiontyp:38,activ:[8,25],actor:[3,6,7,8,11,29,42,49],actor_critic_ag:5,actorcriticag:37,actorcriticalgorithmparamet:5,actual:[4,5,14,15,16,23,24,29,32,33],adam:[7,25],adam_optimizer_beta1:25,adam_optimizer_beta2:25,adapt:[7,11],add:[8,9,20,26,29,31,38,40,43],add_rendered_image_to_env_respons:0,added:[0,4,6,7,10,11,21,29,33,37],adding:[3,11,29,37,50],addit:[3,25,26,28,29,31,33,36,38,39,40,42,48,49,50],addition:[25,28,31,37,38,40,45,46,51],additional_fetch:25,additional_simulator_paramet:[28,38],additionali:39,additive_nois:29,additivenoiseparamet:29,advanc:[24,48],advantag:[3,5,7,11,17,29],affect:[0,13,25],aforement:[15,16,22],after:[0,3,8,10,11,12,19,20,22,24,25,26,28,31,36,50,51],again:29,against:3,agent:[0,1,2,4,5,6,7,8,10,11,12,14,16,18,19,20,21,22,23,24,25,26,28,29,30,31,32,36,38,39,42,44,45,48,49,50],agent_param:41,agent_paramet:[3,25,50],agentparamet:[3,25,37],aggreg:40,ahead:[4,49],aim:29,algorithm:[3,26,29,37,39,40,41,45,47,48,50],algorithmparamet:[3,37],all:[0,3,10,13,21,22,25,26,28,29,31,32,36,37,38,39,40,41,42,43,46,50,51],all_action_prob:26,allow:[0,3,4,17,25,26,28,29,30,31,32,33,39,40,41,42,48,49,50,51],allow_brak:28,allow_duplicates_in_batch_sampl:33,allow_no_action_to_be_select:36,along:[21,28,29,46],alpha:[6,18,22,33],alreadi:[21,26,38,49],also:[5,6,7,21,22,25,28,36,37,39,45,49,51],altern:[28,38,46],alwai:[25,29,32],amazon:43,amazonaw:43,amount:[8,10,18,22,29,40,49],analysi:39,analyz:39,ani:[3,25,26,28,32,33,37,40,41,42,43,50],anoth:[3,17,25,30,50],answer:49,api:[28,42,46,48],appear:[3,50],appli:[0,3,5,8,10,19,25,26,29,31,49,50],applic:49,apply_and_reset_gradi:25,apply_gradi:25,apply_gradients_and_sync_network:25,apply_gradients_every_x_episod:[5,10,19],apply_gradients_to_global_network:25,apply_gradients_to_online_network:25,apply_stop_condit:0,appropri:43,approx:[8,12],approxim:[12,42,49],apt:43,arbitrari:31,architectur:[3,17,37,48,50],architecture_num_q_head:29,area:32,arg:[3,25,43,50],argmax_a:[15,18,22],argument:[3,14,24,25,28,36,40,50],around:[25,26,42],arrai:[3,25,26,28,31,36,38,50],art:[3,44],artifact:43,artifici:33,arxiv:[19,33],aspect:[29,31,39],assign:[0,2,5,6,25,29],assign_kl_coeffici:25,assign_op:25,assum:[26,29,31,33,49],async:[25,41],async_train:25,asynchron:[5,19,25],atari:[16,28,31,43,51],atari_a3c:51,atari_dqn:51,ath:17,atom:[14,23,24],attach:28,attend:32,attent:32,attentionactionspac:32,attentiondiscret:32,attribut:26,attribute_nam:26,author:[28,45,46],auto_select_all_armi:28,autoclean:43,automat:[25,51],autonom:[28,46,48],autoremov:43,auxiliari:[28,46],avail:[4,25,26,28,39,41,43,48,49,51],averag:[6,7,11,25,39,40],avg:6,aws:43,axes:[31,39],axi:[31,39],axis_origin:31,axis_target:31,back:[7,41],backend:[25,41,43,48,51],background:51,backpropag:21,backward:25,balanc:2,band:39,bar:6,base1:43,base64:43,base:[7,11,12,18,20,22,28,33,37,40,43,46,49,50],base_paramet:[0,3,25,28,29],baselin:49,basic:[10,26,41,51],batch:[1,2,3,4,5,6,8,10,11,12,13,14,15,16,17,19,22,23,24,25,33,37,40,50],batch_siz:25,bc_agent:1,bcalgorithmparamet:1,becaus:40,becom:[8,41],been:[17,26,31,45,49],befor:[3,5,11,24,25,26,31,40,41,42,43,49,50],begin:[0,4,40],behav:36,behavior:[3,31,33,37,45,49,50,51],being:[3,37,48,49,50],bellman:[14,23,24],benchmark:[39,47,48,49],best:[49,51],beta1:25,beta2:25,beta:[6,8,10,33],beta_entropi:[5,6,7,10,11],better:[17,49],between:[0,1,2,3,6,7,8,10,11,12,14,18,19,21,23,24,25,26,28,29,32,33,36,37,39,40,42,48,49],bfg:[7,11],bia:[6,49],big:[11,14,24],bilinear:31,bin:[32,43],binari:13,bind:25,binomi:13,bit:31,blizzard:46,blob:[28,31],block:48,blog:48,boilerpl:40,bolling:39,bool:[0,3,4,5,6,7,8,11,12,21,22,24,25,26,28,29,33,36,50],boost:[43,49],bootstrap:[3,5,6,7,8,11,18,19,21,22,24,26,49],bootstrap_total_return_from_old_polici:[21,26],both:[3,7,25,28,29,32,49,50],bound:[6,7,11,14,24,29,36,49],box2d:43,box:[29,32,36],boxactionspac:32,boxdiscret:32,boxmask:32,breakout:51,breakoutdeterminist:[28,51],bring:11,bucket:43,buffer:[1,2,3,6,12,13,14,15,16,19,21,22,23,24,33,40,49,50,51],build:[30,48,49],builder:43,built:[37,40],bullet:6,button:[39,51],c51:14,cach:43,calcul:[3,4,5,6,7,8,10,11,13,14,15,16,18,19,21,22,23,24,25,26,29,33,37,50],call:[0,3,10,19,25,26,28,40,50],call_memori:[3,50],callabl:36,camera:[28,38],camera_height:28,camera_width:28,cameratyp:[28,38],can:[0,2,3,5,6,7,8,11,12,22,25,26,28,29,30,31,32,36,37,38,39,40,42,46,48,50,51],cannot:[3,50],carla:[31,46],carla_environ:28,carlaenviron:28,carlaenvironmentparamet:28,carlo:[3,22],cartpol:[28,38],cartpole_a3c:51,cartpole_clippedppo:[43,51],cartpole_dqn:51,categor:[3,5,6,49],categori:[30,31],categorical_dqn_ag:14,categoricaldqnalgorithmparamet:14,caus:[31,39],cdot:[5,7,8,10,12,13,14,15,16,18,20,22,24],central:[25,39],chain:8,challeng:40,chang:[0,3,6,7,8,11,13,17,19,22,29,40,43,50],change_phas:29,channel:[28,31],channels_axi:36,check:[0,3,26,36,50],checkpoint:[0,3,25,27,41,43,50,51],checkpoint_dir:[3,50],checkpoint_prefix:[3,50],checkpoint_restore_dir:[0,51],checkpoint_restore_path:0,checkpoint_save_dir:0,checkpoint_save_sec:0,child:25,chmod:43,choic:[37,43],choos:[3,17,22,29,30,32,36,37,40,42,49,50,51],choose_act:[3,37,40,50],chosen:[3,12,22,29,32,37,50],chunk:11,cil:49,cil_ag:2,cilalgorithmparamet:2,classic_control:43,clean:[28,37,43],cli:43,clip:[3,6,8,11,25,31,36,49],clip_action_to_spac:36,clip_critic_target:8,clip_gradi:25,clip_high:29,clip_likelihood_ratio_using_epsilon:[7,11],clip_low:29,clip_max:31,clip_min:31,clipbyglobalnorm:25,clipped_ppo_ag:7,clippedppoalgorithmparamet:7,clipping_high:31,clipping_low:31,clone:[3,49],close:28,cmake:43,coach:[0,3,25,27,28,29,30,34,35,37,40,44,45,46,49,51],code:[38,40,49],coeffici:[7,11,25,29,33],collect:[3,7,10,11,19,25,26,33,40,45,48,50,51],collect_sav:[3,25,50],color:31,com:43,combin:[24,42,48,49],comma:0,command:[40,43,51],common:[37,39,43,51],commun:41,compar:[0,11,17,49],complet:[26,29,40],complex:[25,30,40,42,49,51],compon:[3,14,24,25,29,35,37,40,48,50,51],composit:[3,50],compositeag:[3,50],comput:[25,29],concat:25,concentr:40,condit:[0,3],confid:29,config:[28,51],configur:[3,5,10,37,43,50],confus:40,connect:[12,25],connectionist:10,consecut:[8,21],consequ:[19,29],consid:[5,6,32,39],consist:[8,28,31,32,36,40,46],constant:6,constantli:51,constantschedul:33,constrain:32,construct:[12,25,33],consumpt:31,contain:[0,1,2,3,13,25,26,28,36,38,40,50,51],content:43,contin:41,continu:[1,2,5,8,9,10,20,29,30,32,36,45],continuous_entropi:29,continuous_exploration_policy_paramet:29,contribut:[4,48],control:[2,3,5,6,7,8,11,25,29,31,39,46,48,49,50],control_suite_environ:28,controlsuiteenviron:28,conveni:[39,51],converg:10,convers:30,convert:[3,26,29,31,36,40,42,50],convolut:[25,42],coordin:32,copi:[8,12,13,14,15,16,18,19,20,22,23,24,25,43],core:48,core_typ:[3,26,28,36,50,51],correct:[3,6,49],correctli:25,correl:29,correpond:26,correspond:[2,3,4,14,15,25,26,29,31,36,38,50],could:[3,25,36,43,50],count:18,countabl:32,counter:[3,50],counterpart:42,cpu:[0,25],crd:51,creat:[3,19,25,31,38,50,51],create_network:[3,50],create_target_network:25,creation:[3,50],credenti:43,critic:[3,6,7,8,11,29,42,49],crop:[31,32],crop_high:31,crop_low:31,cross:[1,14,24],csv:0,ctrl:39,cuda:43,cudnn7:43,curl:43,curr_stat:[3,37,50],current:[0,1,2,3,4,6,7,8,9,10,11,12,13,15,16,18,20,21,22,23,25,26,28,29,31,32,36,37,40,48,49,50],custom:[28,29,36,37,40],custom_reward_threshold:28,cycl:40,dai:51,dashboard:[0,3,43,48,50],data:[0,10,19,25,33,40,41,43,45,48,49,51],data_stor:[27,43],dataset:[3,7,11,49,50,51],date:[21,42,49,51],dcp:[43,51],ddpg:49,ddpg_agent:8,ddpgalgorithmparamet:8,ddqn:[18,22,49],deal:49,debug:[0,39,48],decai:[5,7,11,25],decid:[0,3,4,28,37,50],decis:[3,50],decod:43,dedic:25,deep:[0,3,5,12,13,15,17,19,20,24,50],deepmind:46,def:[37,38],default_act:36,default_input_filt:38,default_output_filt:38,defin:[0,3,5,6,7,10,11,12,19,21,22,25,26,28,29,31,32,33,36,37,38,40,41,42,45,46,50,51],definit:[3,25,28,36,38,40,50],delai:49,delta:[6,14,21,24],demonstr:[1,2,51],dens:29,densiti:18,depecr:0,depend:[0,3,6,25,31,33,36,38,43,45,49,50],deploi:[35,41],depth:28,descend:49,describ:[3,14,23,31,33,37,40,43,50],descript:[3,32,36,44,51],design:[40,43,48],desir:[32,37],destabil:10,detail:[3,26,44,46,48,51],determin:[2,3,21,26,33,50],determinist:[3,12,49],dev:43,develop:[40,45],deviat:[10,11,29,31,39],devic:25,dfp:49,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,25,26,28,29,36,50],dict_siz:33,dictat:4,dictionari:[2,3,25,26,28,33,36,37,50],did:28,differ:[0,1,2,3,4,5,6,7,10,11,13,17,25,28,29,31,36,37,38,39,41,42,48,49,50],differenti:17,difficult:[39,45],difficulti:51,dimens:[26,28,31,32],dimension:[11,32],dir:[0,3,50,51],direct:[3,28,50],directli:[3,5,40,42,50],directori:[0,25,37,39,43,51],disabl:51,disable_fog:28,disappear:28,disassembl:49,discard:[26,31],discount:[8,10,11,18,21,22,24,25,26,49],discret:[1,2,4,7,11,13,14,15,16,17,18,19,21,22,23,24,29,30,31,32,36,40],disentangl:40,disk:0,displai:[0,39],distanc:36,distance_from_go:36,distance_metr:36,distancemetr:36,distil:[3,50],distribut:[5,6,10,11,12,14,23,24,25,27,29,34,35,36,42,48,49,51],distributed_coach:41,distributed_coach_synchronization_typ:41,distributedcoachsynchronizationtyp:41,divereg:[7,11],diverg:[6,7,11,24],dnd:[0,21,49],dnd_key_error_threshold:21,dnd_size:21,do_action_hindsight:33,doc:43,docker:43,dockerfil:43,document:46,doe:[13,25,31],doesn:41,doing:[7,11,30],domain:42,don:[4,29,39,49],done:[0,3,7,10,11,28,31,38,50,51],doom:[28,38,43,46],doom_basic_bc:51,doom_basic_dqn:51,doom_environ:[28,38,51],doomenviron:[28,38],doomenvironmentparamet:[38,51],doominputfilt:38,doomlevel:28,doomoutputfilt:38,doubl:[3,18,24],doubli:50,down:[25,28],download:43,dpkg:43,dqn:[3,18,19,24,28,29,31,32,40,42,49],dqn_agent:[16,50],dqnagent:50,dqnalgorithmparamet:16,drive:[2,28,46,48],driving_benchmark:28,due:31,duel:[3,24],dump:[0,3,50],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,50],dump_one_value_per_step:[3,50],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,dure:[3,6,7,10,11,12,13,21,29,39,40,50,51],dynam:[39,45,49],e_greedi:29,each:[0,1,2,3,4,5,6,7,10,11,12,13,15,16,17,19,21,22,23,25,26,28,29,30,31,32,33,36,37,39,40,41,42,43,45,49,50],eas:39,easi:[38,39,48],easier:42,easili:[29,51],echo:43,effect:[0,3,6,7,19,31,40,50],effici:[6,40,49],either:[0,3,5,19,25,29,36,39,42,51],element:[3,13,25,31,36],elf:43,embbed:25,embed:[3,21,25,50],embedd:[25,42],embedding_merger_typ:25,embeddingmergertyp:25,emploi:49,empti:26,emul:6,enabl:[25,42,51],encod:[31,36],encourag:[20,22,40],end:[2,3,10,24,26,28,31,50,51],enforc:32,engin:[28,46],enough:[4,6,21],ensembl:[29,49],ensur:[6,25],enter:[3,50,51],entir:[11,18,21,24,29,32,40],entri:[21,40],entropi:[1,5,6,7,10,11,12,14,24,29,49],enumer:36,env:[26,43],env_param:38,env_respons:[3,50],enviorn:28,environ:[0,3,4,6,17,25,26,29,30,31,32,36,37,40,43,45,47,48,50],environmentparamet:[28,38],envrespons:[0,3,28,50],episod:[0,3,4,5,10,11,13,18,19,24,28,29,37,38,39,40,41,50,51],episode_max_tim:28,episodic_hindsight_experience_replai:33,epoch:[7,50],epsilon:[7,29,33],epsilon_schedul:29,equal:2,equat:[8,12,15,16,19,23],error:[25,49],escap:51,especi:17,essenti:[19,25,32,38,40,43],estim:[3,5,7,11,13,18,22,29,50],estimate_state_value_using_ga:[5,7,11],eta:[7,11],etc:[0,3,25,28,30,36,37,46,50],evalu:[0,3,12,25,26,29,40,50],evaluate_onli:0,evaluation_epsilon:29,evaluation_noise_percentag:29,even:[17,25,28,38,39,40,49],everi:[0,5,6,8,10,12,13,14,15,16,18,19,20,22,23,24,51],exact:[21,29,45],exactli:25,exampl:[2,3,4,25,26,28,29,30,31,32,36,37,38,40,42,50,51],except:[19,26],execut:[26,39,40],exhibit:[3,37,50],exist:[21,25],exit:[3,50],expand_dim:26,expect:[0,3,29,45,50],experi:[0,6,8,11,12,24,28,33,34,39,40,41,43,48,49,51],experiment_path:[0,28],experiment_suit:28,experimentsuit:28,expert:[1,2,26,49],exploit:[29,40],explor:[3,4,5,6,7,8,9,11,13,18,20,21,37,40,48,49],exploration_polici:29,explorationparamet:[3,29,37],exponenti:[6,7,11,24,25],expor:3,export_onnx_graph:0,expos:[39,42,48],extend:[28,29,46],extens:[28,46],extent:51,extern:0,extra:[25,26,42],extract:[3,20,21,26,31,36,39,40,50],factor:[8,10,11,22,24,25,26,29,31],faithfulli:39,fake:36,fals:[0,3,8,25,26,28,29,32,33,36,38,50],far:[11,31,40,45],faster:[17,49],featur:[8,28,42,48,49],feature_minimap_maps_to_us:28,feature_screen_maps_to_us:28,fetch:[25,26],fetched_tensor:25,few:[10,13,14,15,16,18,22,23,24,29,38],field:[45,48],file:[0,3,37,40,50,51],fill:[26,38],filter:[0,3,48,50],find:[15,39,46,48],finish:[21,51],finit:32,first:[0,8,11,13,21,23,24,25,26,31,40,42],fit:36,flag:[0,3,25,26,28,50],flexibl:41,flicker:28,flow:[30,48],follow:[2,3,5,6,8,10,12,14,15,16,19,20,21,23,24,25,26,28,29,33,37,38,43,45,49,50],footprint:31,forc:[25,28,32,38],force_cpu:25,force_environment_reset:[28,38],force_int_bin:32,forced_attention_s:36,form:[4,19,36,49],format:37,formul:[5,6],forward:[25,29],found:[3,44,51],frac:[6,7,12,14,24],fraction:[7,11],frame:[0,28],frame_skip:28,framework:[0,3,25,37,48,50],framework_typ:0,free:[28,46],freeglut3:43,from:[0,1,2,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,32,34,36,37,38,39,40,41,42,43,45,46,48,50,51],full:[3,10,18,32,50],fulldiscreteactionspacemap:32,fulli:25,func:[3,50],futur:[0,3,10,26,49],future_measurements_weight:4,gae:[5,7,11],gae_lambda:[5,7,11],game:[3,26,28,46,48,50,51],game_ov:26,gamma:[5,6,8,12,13,14,15,16,18,19,20,21,22,24],gap:[22,49],gather:41,gaussian:[11,12,29],gener:[0,5,7,11,13,25,28,29,33,36,37,43,51],general_network:37,get:[3,4,7,8,9,10,11,13,15,16,18,20,22,25,26,28,29,36,40,42,43,45,50],get_act:29,get_action_from_us:28,get_available_kei:28,get_first_transit:26,get_goal:28,get_last_env_respons:28,get_last_transit:26,get_output_head:37,get_predict:[3,50],get_random_act:28,get_rendered_imag:[28,38],get_reward_for_goal_and_st:36,get_state_embed:[3,50],get_transit:26,get_transitions_attribut:26,get_variable_valu:25,get_weight:25,gfortran:43,gif:0,git:43,github:[38,43,45,48],given:[0,1,2,3,4,5,8,10,11,25,26,28,29,31,32,33,36,37,40,50],given_weight:25,global:[3,25,42,50],global_network:25,glx:43,goal:[1,2,3,4,6,25,26,28,33,40,42,49,50],goal_from_st:36,goal_nam:36,goal_spac:28,goal_vector:4,goals_spac:33,goalsspac:[33,36],goaltorewardconvers:36,going:30,good:[38,39],gpu:[0,25],gracefulli:51,gradient:[3,5,6,7,11,19,21,25,37,49,50],gradientclippingmethod:25,gradients_clipping_method:25,granular:33,graph:0,graphmanag:40,grayscal:[31,36],greedili:40,group:39,grow:24,guidelin:49,gym:[43,46],gym_environ:[28,51],gymenviron:28,gymenvironmentparamet:38,hac:49,had:45,hand:[17,31,40,49],handl:4,handle_episode_end:[3,28,50],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[39,49],harder:39,has:[0,3,17,21,22,26,29,31,40,42,45,49,50],has_glob:25,has_target:25,hat:[6,7,14,24],have:[0,3,4,6,25,28,29,31,32,33,40,42,45,50],head:[1,2,3,5,6,10,13,17,20,21,25,29,37,42,50],headparamet:25,heads_paramet:25,health_gath:28,heat:6,heatup:[29,40],help:[22,26,39,40,49],here:[38,40],heurist:[11,29],hide:42,hierarch:[36,40],hierarchi:[3,40,49,50],high:[8,11,31,32,36,39],high_i:36,high_kl_penalty_coeffici:11,high_x:36,higher:11,highest:[5,6,10,22,29,31,32,36],highli:[0,38,49],hindsight:[9,33,49],hindsight_goal_selection_method:33,hindsight_transitions_per_regular_transit:33,hindsightgoalselectionmethod:33,hold:[13,25,26,33,39,40,42],horizont:[43,48,51],host:43,hostnam:0,hot:36,how:[4,7,11,29,41,43,49,51],hrl:33,html:43,http:[19,33,43],hub:43,huber:23,huber_loss_interv:23,human:[0,28],human_control:28,hyper:[37,45],hyperparamet:37,ident:25,identifi:[25,36],ies:50,ignor:28,imag:[0,25,28,31,32,36,38,42,51],image1:43,imit:[3,26,44,49],impact:25,implement:[3,7,11,25,27,28,29,33,37,38,41,45,49,51],impli:51,implment:35,importance_weight:25,importance_weight_trunc:6,importantli:40,improv:[5,17,24,28,40,49],improve_reward_model:50,includ:[0,3,4,28,30,31,35,42,46,50,51],increas:[11,22,31,49],increment:[3,50],index:[0,2,26,28,31,32,33,36],indic:36,inf:[31,36],infer:[3,25,28,50],infinit:[0,49],info:[3,13,26,36,38,50],info_as_list:26,inform:[3,4,19,26,28,30,39,40,43,46,50],inherit:[3,37,38],init_environment_dependent_modul:[3,50],initi:[3,4,11,22,25,26,37,40,48,50],initial_feed_dict:25,initial_kl_coeffici:11,innov:49,input:[1,2,3,4,8,13,15,16,18,20,21,22,25,30,36,40,42,50],input_embedders_paramet:25,input_high:31,input_low:31,input_space_high:32,input_space_low:32,inputembedderparamet:25,inputfilt:40,insert:[21,26],inspect:0,instal:[43,51],instanc:[3,34,36,42],instanti:[3,28,40],instead:[0,3,7,19,22,25,31,32,40,49,50],instruct:51,intact:[13,45],integ:[0,31,32],integr:[38,40,41,48],intel:48,intend:[10,25,29,40],interact:[26,40,41,48,51],interest:[25,39],interfac:[28,39,41,46],intermedi:21,intern:[3,10,19,25,26,30,40,50,51],interpol:31,intersect:49,interv:23,intro:48,introduc:49,invers:[28,46],invok:40,involv:37,is_empti:26,is_valid_index:36,item:26,iter:[3,5,6,8,11,17,25,50],its:[0,3,14,24,25,26,29,36,40,43,49,50,51],itself:[25,36,51],job:0,job_typ:0,joint:28,json:0,jump:[4,32],jupyt:37,just:[3,11,22,24,38,40,42,50,51],kapa:23,keep:[16,26,31,51],kei:[2,21,25,26,28,33,37,39,43,49,51],key_error_threshold:33,key_width:33,keyboard:[28,51],keyword:25,kl_coeffici:25,kl_coefficient_ph:25,know:[3,49,50,51],knowledg:[3,40,50],known:[26,39,45,49],kubeconfig:35,kubernet:43,kubernetes_orchestr:35,kubernetesparamet:35,kwarg:[25,28],l2_norm_added_delta:21,l2_regular:25,lack:39,lamb:29,lambda:[5,7,11,29],lane:2,larg:[29,31,46],larger:25,last:[4,6,11,21,26,28,31],last_env_respons:28,lastli:40,later:[0,3,25,50,51],latest:[19,21,40,43],layer:[25,29,33,40,42],lazi:[26,31],lazystack:31,lbfg:25,ld_library_path:43,lead:29,learn:[0,3,4,5,6,8,9,10,12,13,14,15,16,17,20,23,24,25,26,28,29,31,39,40,42,44,45,46,49,50],learn_from_batch:[3,37,40,50],learner:25,learning_r:[25,33],learning_rate_decay_r:25,learning_rate_decay_step:25,least:[42,49],leav:[11,13],left:[2,6,12,49],length:[4,5,7,11,19,21,25,26],less:[17,49],level:[0,3,25,28,38,50,51],levelmanag:[3,40,50],levelselect:28,libatla:43,libav:43,libavformat:43,libbla:43,libboost:43,libbz2:43,libfluidsynth:43,libgl1:43,libglew:43,libgm:43,libgstream:43,libgtk2:43,libgtk:43,libjpeg:43,liblapack:43,libnotifi:43,libopen:43,libosmesa6:43,libportmidi:43,librari:[28,43,46],libsdl1:43,libsdl2:43,libsdl:43,libsm:43,libsmpeg:43,libswscal:43,libtiff:43,libwebkitgtk:43,libwildmidi:43,like:[12,28,36,40,42,43,49],likelihood:[7,11],line:[3,40,50,51],linear:32,linearboxtoboxmap:32,linearli:32,list:[0,3,4,25,26,28,29,31,32,36,37,50,51],load:[0,39,41,51],load_memory_from_file_path:51,local:[3,42,43,50],locat:[23,26,31,49],log:[0,3,5,6,10,12,50],log_to_screen:[3,50],logger:[0,3,50],look:[38,43],loop:40,loss:[1,2,3,6,7,10,11,14,15,16,23,24,25,29,37,42,50],lot:[29,39,45,49],low:[8,11,31,32,36],low_i:36,low_x:36,lower:[0,33,40],lowest:[31,32,36],lstm:42,lumin:31,lvert:[6,14,24],lvl:51,mai:[0,25,44,51],main:[3,37,40,42,44,50,51],mainli:41,major:29,make:[0,3,25,28,37,39,43,45,49,50],manag:[3,25,41,43,50],mandatori:[36,38,42],mani:[3,17,44,45],manner:[11,18,19,22,31,40],manual:43,map:[3,25,28,30,31,32,36,37,50],mark:26,markdown:50,mask:[13,32],masked_target_space_high:32,masked_target_space_low:32,master:[3,40,43,50],match:[2,21,25,36],mathbb:[5,6],mathop:5,max:[5,6,14,19,24,31],max_a:[13,16,21,22],max_action_valu:26,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_kl_diverg:6,max_over_num_fram:28,max_simultaneous_selected_act:36,max_siz:33,max_spe:28,maxim:[4,15],maximum:[0,12,14,16,21,22,26,28,29,31,33,49],mean:[0,2,7,8,9,10,11,12,20,25,29,31,32,36,39,49],meant:42,measur:[3,4,25,28,31,36,38,49,50],measurements_nam:36,mechan:[30,41,45,51],memor:49,memori:[3,24,26,31,37,40,41,43,48,49,50],memory_backend:43,memorygranular:33,memoryparamet:[3,37],merg:[25,28],mesa:43,method:[0,5,7,11,19,25,31,33],metric:[0,36,39],mid:6,middlewar:[21,25,42],middleware_paramet:25,middlewareparamet:25,midpoint:23,might:[3,10,28,37,42,50],min:[6,7,14,22,24],min_:12,min_reward_threshold:0,mind:51,minim:[2,4,14],minimap_s:28,minimum:[0,7,31],mix:[3,7,11,21,22,49],mixedmontecarloalgorithmparamet:18,mixer1:43,mixtur:[18,25],mjkei:43,mjpro150:43,mjpro150_linux:43,mkdir:43,mmc:[18,49],mmc_agent:18,mode:[22,25,27,34,35,40,41,43,51],model:[0,18,20,25,48,50,51],modif:49,modifi:6,modul:[3,37,40,41,50],modular:[37,40,42,48],monitor:41,mont:[3,22],monte_carlo_mixing_r:[18,22],more:[3,8,19,25,31,37,39,40,42,43,48,50,51],moreov:39,most:[3,10,21,25,26,29,42,45,49,50,51],mostli:[31,40],motiv:40,move:[6,7,11,31,39,45],mp4:0,mse:[2,6,15,16,23],much:[7,11,40,49],mujoco:[28,32,38,43,46],mujoco_kei:43,mujoco_pi:43,multi:[11,25,36,42],multidimension:36,multipl:[4,7,11,19,25,28,29,31,32,33,36,39,40,45,48,51],multipli:[4,10,25,31],multiselect:32,multitask:[28,46],must:[25,31,36,45],mxnet:51,n_step:[21,24,26,33],n_step_discounted_reward:26,n_step_q_ag:19,nabla:[6,8],nabla_:[8,12],nabla_a:8,naf:49,naf_ag:20,nafalgorithmparamet:20,name:[3,25,26,28,31,36,37,43,50,51],namespac:35,nasm:43,nativ:[0,28,38,46],native_rend:0,navig:3,ndarrai:[3,25,26,28,29,31,32,36,38,50],nearest:21,neat:39,nec:[0,49],nec_ag:21,necalgorithmparamet:21,necessari:[3,21,25,50],necessarili:31,need:[0,3,6,24,25,28,29,36,37,40,45,49,50,51],neg:[4,31],neighbor:21,neon_compon:37,nervanasystem:43,network:[0,3,25,29,37,40,45,48,49,50,51],network_input_tupl:25,network_nam:[3,50],network_param:29,network_paramet:25,network_wrapp:[3,25,50],networkparamet:[3,25,29,37],networkwrapp:[3,50],neural:[3,18,25,42,45],never:25,new_value_shift_coeffici:[21,33],new_weight:25,newli:[22,38,49],next:[0,3,8,15,16,20,22,23,26,28,40,50,51],next_stat:26,nfs_data_stor:27,nfsdatastoreparamet:27,nice:51,no_accumul:25,node:[25,42],nois:[8,9,20,29,40],noise_percentage_schedul:29,noisi:[10,24,29],non_episod:33,none:[0,3,7,8,11,25,26,28,29,31,32,36,38,50],norm:25,norm_unclipped_grad:25,norm_unclippsed_grad:25,normal:[3,4,10,29,30,31,36],note:[21,25,29,50],notebook:37,notic:[25,49],notori:[39,45,49],now:[7,38],nstepqalgorithmparamet:19,nth:24,num_act:[21,33,36],num_bins_per_dimens:32,num_class:33,num_consecutive_playing_step:[3,8,50],num_consecutive_training_step:[3,50],num_gpu:0,num_neighbor:33,num_predicted_steps_ahead:4,num_speedup_step:28,num_steps_between_copying_online_weights_to_target:[8,12,19],num_steps_between_gradient_upd:[5,6,10,19],num_task:0,num_training_task:0,num_transitions_to_start_replai:6,num_work:0,number:[0,2,4,5,6,8,10,12,13,14,19,21,23,24,25,26,28,29,31,32,33,39,46,50,51],number_of_knn:21,numpi:[3,25,26,28,29,31,32,36,38,50],nvidia:43,object:[0,3,24,25,28,29,31,33,40,50],observ:[0,3,4,11,25,26,28,30,38,40,50],observation_reduction_by_sub_parts_name_filt:31,observation_rescale_size_by_factor_filt:31,observation_rescale_to_size_filt:31,observation_space_s:25,observation_space_typ:28,observation_stat:31,observation_typ:28,observationspac:36,observationspacetyp:28,observationtyp:28,off:[3,6,12,41,49,50],offer:[28,46],often:[39,40,42],old:[7,11,25,49],old_weight:25,onc:[0,7,10,11,13,14,15,16,18,19,22,23,24,25,36,51],one:[0,3,6,17,21,22,25,26,28,29,30,33,36,38,39,42,49,50],ones:[38,49],onli:[0,3,4,5,6,7,10,11,13,14,16,17,19,21,23,24,25,26,28,29,31,32,38,40,49,50,51],onlin:[8,12,13,14,15,16,18,19,20,21,22,23,24,25,40,42],online_network:25,onnx:[0,25],onto:30,open:[0,28,46],openai:[43,46],opencv:43,oper:[22,25,31],optim:[3,4,6,25,44,49],optimization_epoch:7,optimizer_epsilon:25,optimizer_typ:25,option:[6,10,25,28,32,36,37,39,41,42,51],orchestr:[41,43,48],order:[0,3,5,6,7,8,10,11,12,15,16,17,19,20,21,22,23,25,26,30,31,32,39,40,42,45,49,50],org:[19,33],origin:[19,31,32,45],ornstein:[8,9,29],other:[0,2,10,17,22,25,28,30,31,33,39,40,49],otherwis:[11,13,25,28,29,36],ou_process:29,our:7,out:[2,15,16,29,30,32,39,43,48,49,51],outcom:[29,40],output:[0,4,6,8,13,14,20,21,25,29,30,31,36,37,42],output_0_0:25,output_observation_spac:31,outputfilt:40,outsid:[4,29],over:[3,7,10,11,19,21,24,25,26,29,31,32,39,40,49,50],overestim:8,overfit:11,overhead:0,overlai:39,overrid:[3,50],override_existing_kei:33,overriden:37,overview:40,overwhelm:40,overwritten:25,own:[25,37],p_j:[14,24],page:[3,45],pair:[0,36],pal:[22,49],pal_ag:22,pal_alpha:22,palalgorithmparamet:22,paper:[5,10,12,14,19,21,23,28,33,45],parallel:[6,25,39,42],parallel_predict:25,param:[3,25,26,27,28,29,34,35,37,38,50],paramet:[2,3,4,5,6,7,8,10,11,12,14,18,19,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,45,48,50,51],parameter_nois:29,parameters_server_host:0,parent:[3,25,50],parent_path_suffix:[3,25,50],parmet:3,pars:40,part:[0,13,25,26,29,31,32,41,42,45,49],part_nam:31,partial:32,partialdiscreteactionspacemap:32,particular:4,particularli:[28,29,36,45,49],pass:[0,4,8,9,20,21,25,28,29,30,38,39,40,42,51],patamet:21,patchelf:43,patchelf_0:43,path:[0,3,25,37,38,43,50,51],pattern:40,pdf:33,penal:[7,8,11],penalti:11,pendulum_hac:38,pendulum_with_go:38,pendulumwithgo:38,per:[0,3,4,36,37,40,50],percentag:29,percentil:29,perceptron:42,perform:[0,3,6,25,26,31,33,38,39,40,49,50],period:[42,51],persist:3,persistent_advantage_learn:22,perspect:14,phase:[3,6,7,8,9,11,12,25,28,29,40,50],phi:[14,24],physic:[28,46],pi_:[6,7,12],pick:[12,28],pickl:51,pickledreplaybuff:51,pip3:43,pip:43,pixel:28,place:[32,39,40],placehold:[25,29],plai:[0,3,10,13,15,16,19,29,37,39,50],plain:42,planarmap:28,planarmapsobservationspac:31,platform:[28,46],pleas:[19,45],plu:25,plugin:43,point:[31,36,40,41],polici:[1,3,4,5,6,9,12,13,19,20,21,27,37,40,41,42,43,44,48,49,50],policy_gradient_rescal:[5,7,10,11],policy_gradients_ag:10,policygradientalgorithmparamet:10,policygradientrescal:[5,7,10,11],policyoptimizationag:37,popul:40,popular:[28,46],port:0,posit:[4,31],possibl:[2,3,4,21,29,32,36,39,42,48,49,50,51],post:[30,48],post_training_command:[3,50],power:[28,46],ppo:[7,11,49],ppo_ag:11,ppoalgorithmparamet:11,pre:[8,29,30],predefin:[13,22,29,51],predict:[1,2,3,5,6,7,8,11,12,13,14,15,16,22,23,24,25,29,42,49,50],prediction_typ:[3,50],predictiontyp:[3,50],prefect:49,prefer:25,prefix:[3,50],prep:43,prepar:[3,50],prepare_batch_for_infer:[3,50],present:[17,21,25,28,31,49],preset:[0,5,37,38,40,41,43,51],press:[39,51],prevent:[8,11,40],previou:31,previous:[11,25],print:[0,3,51],print_networks_summari:0,priorit:[24,33],prioriti:[24,33],privat:36,probabilit:[5,6],probabl:[3,5,6,10,13,14,24,26,29,37,49,50],problem:49,procedur:6,process:[0,3,8,9,25,29,30,31,32,37,39,40,42,45,48,50],produc:25,progress:25,project:[14,24],propag:7,propagate_updates_to_dnd:21,properti:[25,33,37,38,43],proport:33,provid:[25,41],proxi:40,proxim:3,pub:[34,35,43],publish:45,purpos:[0,3,10],pursuit:2,pybullet:[28,46],pygam:[0,43],pytest:43,python3:43,python:[28,33,37,43,46,48],q_i:12,qr_dqn_agent:23,quad:6,qualiti:28,quantil:[3,49],quantileregressiondqnalgorithmparamet:23,queri:[21,25,40,49],question:49,quit:39,r_i:[5,19],r_t:[4,6,7,24],rainbow:[3,37,49],rainbow_ag:37,rainbow_dqn_ag:24,rainbowag:37,rainbowagentparamet:37,rainbowalgorithmparamet:37,rainbowdqnalgorithmparamet:24,rainbowexplorationparamet:37,rainbowmemoryparamet:37,rainbownetworkparamet:37,rais:[3,26,50],ramp:[37,40],random:[0,19,28,29,36,40,45],random_initialization_step:28,randomli:[26,40],rang:[4,7,8,11,14,24,28,29,31,32,36,49],rare:21,rate:[0,6,18,21,25,28,42],rate_for_copying_weights_to_target:[6,8,12],rather:[4,12,39],ratio:[6,7,11,18,31],ratio_of_replai:6,raw:[28,46],reach:[0,11,36],read:27,readabl:40,readm:43,real:3,reason:[31,45],rebuild_on_every_upd:33,receiv:[25,26],recent:[3,24,25,49,50],recommend:38,redi:[34,35,43],redispubsub:43,redispubsubmemorybackendparamet:34,reduc:[1,2,10,11,22,25,31,40,49],reduct:31,reduction_method:31,reductionmethod:31,redund:31,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,41,43],referenc:3,regard:[3,50],region:[6,49],regist:[3,50],register_sign:[3,50],registri:43,regress:[2,3,49],regula:[6,7,11],regular:[5,7,10,11,19,21,25,29,32,33,49],regularli:25,reinforc:[3,5,8,9,10,12,14,15,16,17,19,22,23,24,28,29,39,40,42,44,45,46,49],rel:29,relat:[25,43],relationship:49,releas:[0,48,49],relev:[3,13,29,31,50],remov:[0,31],render:[0,3,28,38],reorder:31,repeat:[28,40],replac:[29,31,33,43],replace_mse_with_huber_loss:25,replai:[1,2,3,6,8,12,13,14,15,16,19,21,22,23,24,33,40,49,50,51],replay_buff:51,replicated_devic:25,repo:38,repositori:48,repres:[0,7,11,14,24,25,26,28,29,32,36,51],represent:42,reproduc:[40,45],request:[3,25,50],requir:[3,25,27,29,31,39,42,43,49,50],requires_action_valu:29,rescal:[4,5,7,10,11,25,30,31],rescale_factor:31,rescaleinterpolationtyp:31,rescaling_interpolation_typ:31,research:[28,45,46],reset:[3,21,25,28,29,38,50],reset_accumulated_gradi:25,reset_evaluation_st:[3,50],reset_gradi:25,reset_internal_st:[3,28,50],resourc:[41,43],respect:[8,26,28],respons:[3,26,28,40,50],rest:[25,26,32,43],restart:38,restor:[0,3,50],restore_checkpoint:[3,50],result:[3,4,14,15,16,17,23,24,25,31,32,45,49,50,51],ret:6,retrac:6,retriev:[21,33],return_additional_data:33,reus:40,reusabl:42,reward:[0,1,2,3,4,8,10,18,19,24,25,26,28,30,36,38,39,40,49,50],reward_test_level:0,reward_typ:36,rgb:[28,31,36],rho:[6,8],rho_t:6,right:[2,3,6,12,29,32,39,49,50],rl_coach:[0,1,2,3,4,5,6,7,8,10,11,12,14,16,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,38,43,50,51],rms_prop_optimizer_decai:25,rmsprop:25,roboschool:[28,46],robot:[28,36,46,48],roboti:43,robust:50,rollout:[27,34,35,41,43,51],root:[39,43],rule:[8,13],run:[0,3,4,8,10,11,12,13,15,16,21,22,25,28,29,31,50,51],run_off_policy_evalu:[3,50],run_pre_network_filter_for_infer:[3,50],runphas:[3,50],runtim:43,rvert:[14,24],rvert_2:6,s3_bucket_nam:43,s3_creds_fil:43,s3_data_stor:27,s3_end_point:43,s3datastoreparamet:27,s_t:[4,5,6,8,12,13,14,15,16,18,19,20,22,24],sac:49,sai:49,same:[3,4,7,10,18,19,22,25,28,32,33,39,42,45,49,50],sampl:[1,2,3,5,6,8,10,11,12,13,14,15,16,18,19,22,23,24,25,29,33,36,40,43,50],sample_with_info:36,satur:8,save:[0,3,24,25,29,43,50,51],save_checkpoint:[3,50],saver:[3,25,50],savercollect:[3,25,50],scale:[4,10,25,31,39,43,48,51],scale_down_gradients_by_number_of_workers_for_sync_train:25,scale_measurements_target:4,scaler:25,schedul:[7,29,33,40,41,43,51],scheme:[5,29,40,49],schulman:11,sci:43,scienc:45,scipi:[31,43],scope:25,scratch:49,scratchpad:0,screen:[3,28,38,51],screen_siz:28,script:40,second:[0,25,39,49,51],section:[43,44,46],see:[3,28,31,43,45,46,49,50,51],seed:[0,28,45],seen:[4,21,22,28,31,40,45,49],segment:[28,36],select:[5,13,21,25,26,29,31,32,36,38,39,40,48,51],self:[3,25,37,38,50],send:[38,42],separ:[0,3,17,31,32,42,44,49],separate_actions_for_throttle_and_brak:28,seper:10,sequenti:[4,26,33],serv:[7,10,42],server:0,server_height:28,server_width:28,sess:[3,25,50],session:[3,25,50],set:[0,2,3,4,5,6,7,8,11,14,15,16,18,21,22,24,25,26,28,29,31,32,36,37,41,45,46,48,49,50,51],set_environment_paramet:[3,50],set_goal:28,set_incoming_direct:[3,50],set_is_train:25,set_sess:[3,50],set_variable_valu:25,set_weight:25,setup:[3,43,50],setup_logg:[3,50],setuptool:43,sever:[0,3,7,10,11,13,25,28,29,31,37,38,39,40,42,46,49,50,51],shape:[25,31,36],share:[0,3,25,33,42,50],shared_memory_scratchpad:0,shared_optim:25,shift:[32,40],shine:39,should:[0,3,4,7,11,13,19,22,25,26,28,31,33,36,37,38,41,50,51],should_dump:0,shouldn:13,show:45,shown:45,shuffl:26,side:[3,50],sigma:29,signal:[3,40,50],signal_nam:[3,50],significantli:17,sim:[6,12],similar:[7,17,19,26,28,32,49],simpl:[10,33,37,38,42,48,49,51],simplest:49,simplif:49,simplifi:[7,39,42],simul:[28,38,46,51],simultan:7,sinc:[3,7,8,10,19,21,22,24,25,29,31,50],singl:[3,4,5,6,7,11,13,17,18,19,25,26,28,29,32,36,39,40,42,50],size:[25,26,29,31,32,33,36],skill:49,skip:[28,40],slave:[3,50],slice:26,slow:[25,51],slower:[0,17,25],slowli:8,small:[7,21,33],smaller:29,smooth:39,soft:[3,8,11,20,49],soft_actor_critic_ag:12,softactorcriticalgorithmparamet:12,softmax:[25,29],softmax_temperatur:25,softwar:43,solut:49,solv:[31,38,46,48],some:[0,3,11,25,26,29,31,37,38,39,42,45,49,50,51],sort:23,sourc:[0,1,2,3,4,5,6,7,8,10,11,12,14,16,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,38,43,46,50],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,28,29,30,31,32,33,40,48,50],spacesdefinit:[3,25,50],spatial:49,spawn:[41,43],special:17,specif:[0,3,13,17,21,25,26,37,40,51],specifi:[0,25,28,29,31,38,41,51],speed:[25,31,49],speedup:51,spread:[31,32],squar:31,squeeze_list:25,squeeze_output:25,src:43,stabil:[6,19,25,49],stabl:[42,49],stack:[3,30,31,36,50],stack_siz:[25,31],stacking_axi:31,stage:42,stai:45,standard:[7,10,11,13,29,31,39],starcraft2_environ:28,starcraft2environ:28,starcraft:[36,46],starcraftobservationtyp:28,start:[3,6,8,11,12,17,22,26,31,32,38,43,50],state:[1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,28,31,33,36,37,38,40,42,44,49,50],state_key_with_the_class_index:[2,33],state_spac:28,state_valu:26,statist:[3,10,31,48,50],std:12,stdev:29,steep:29,step:[0,3,4,5,6,7,8,10,11,12,13,14,15,16,18,20,21,22,23,24,25,26,28,29,31,37,38,39,40,49,50,51],stepmethod:[8,12,19],stochast:[12,40,49],stop:[0,28],store:[0,3,21,24,26,28,31,33,39,40,41,43,48,50,51],store_transitions_only_when_episodes_are_termin:24,str:[0,2,3,4,19,25,26,28,29,31,32,36,50],strategi:[28,46],stream:[17,41],strict:45,string:[0,25,28],structur:[0,3,26,33,37,40,50],stuff:25,style:29,sub:[32,33,34,35,36,37,40,43,51],sub_spac:36,subset:[39,45,49],subtract:22,succeed:28,success:[0,28,49],suffer:39,suffici:26,suffix:[3,25,50],suggest:37,suit:[0,46],suitabl:[41,51],sum:[4,7,10,18,25,26],sum_:[5,12,14,18,19,21,24],summari:[0,3,50],supervis:49,suppli:[3,50],support:[0,3,25,28,29,39,42,43,44,46,48,51],sure:[0,43,45],surrog:7,swig:43,swingup:28,symbol:25,sync:[3,25,40,41,50],synchron:[0,25,40,42],t_max:[10,19],tag:43,take:[0,3,10,11,17,21,22,25,28,29,30,38,39,40,50],taken:[1,2,4,5,6,7,8,11,12,14,17,21,22,23,24,25,26,28,29],tanh:8,tar:43,target:[0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,18,19,20,21,22,23,24,25,28,31,32,36,37,40,42,50],target_act:32,target_kl_diverg:11,target_network:25,target_success_r:28,targets_horizon:19,task:[0,1,2,28,31,37,39,46],task_index:0,tau:12,techniqu:[7,11,48,49],technolog:41,teh:25,temperatur:[25,29],temperature_schedul:29,tensor:[3,25,50],tensorboard:0,tensorflow:[0,3,25,50,51],tensorflow_support:25,term:[6,7,11],termin:[3,8,26,40,50],test:[0,3,5,6,8,9,10,11,12,25,37,45,48,51],test_using_a_trace_test:0,text:6,textrm:40,than:[0,3,11,25,29,39,42,50],thei:[3,21,22,25,29,39,40,41,49,50,51],them:[4,5,10,19,25,26,28,31,36,38,39,42],therefor:[0,8,25,30,49],theta:[6,7,8,12,14,24,29],theta_:[6,7],thi:[0,3,4,5,6,7,8,10,11,13,17,19,21,24,25,26,28,29,30,31,32,33,34,36,37,38,39,40,41,42,43,45,49,50,51],thing:39,those:[0,3,8,13,15,16,17,21,26,29,32,40,42,44,49,50],thousand:[11,13,14,15,16,18,22,23,24],thread:25,three:[3,41,42,43,44],threshold:[11,21,31],through:[0,3,4,8,9,10,11,13,21,22,25,37,38,40,42,50],tild:[8,12],time:[0,4,22,25,29,32,33,39,42,49],time_limit:38,timestep:[4,10],timid:43,tmp:0,togeth:[3,19,26,40,50],toggl:39,too:11,tool:[39,43,49],top:[25,28,30,31,33,38,39,49],torqu:28,total:[0,3,10,11,18,21,22,26,33,37,39,49,50],total_loss:25,total_return:26,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:29,train:[0,3,17,25,29,34,35,37,38,39,40,41,42,45,48,49,50],train_and_sync_network:25,train_on_batch:25,train_to_eval_ratio:33,trainer:[27,41],transfer:[28,34,46],transit:[1,2,3,4,5,6,8,10,11,12,14,15,16,19,21,22,23,24,33,37,40,41,50],transition_idx:26,tri:49,trick:45,tricki:39,trigger:[28,43],truncat:6,truncated_norm:29,trust:[6,49],ttf2:43,tune:29,tupl:[1,2,3,8,25,26,28,33,36,37],turn:[2,49],tutori:[37,38],tweak:[3,50],two:[8,10,19,25,28,29,30,31,32,36,38,41,42,51],txt:43,type:[0,3,10,17,25,28,31,36,37,40,42,48,49,50,51],typic:[7,11,25,49,51],ubuntu16:43,uhlenbeck:[8,9,29],uint8:31,unbound:36,uncertain:29,uncertainti:29,unchang:11,unclip:[3,37,50],uncorrel:19,undeploi:41,under:[3,25,37,51],underbrac:5,understand:51,unifi:7,uniformli:[28,29,32,36],union:[3,26,28,29,32,36,50],uniqu:25,unit:39,unlik:11,unmask:32,unnecessari:0,unshar:[3,50],unsign:31,unspecifi:25,unstabl:[39,45],until:[0,6,10,11,21,24,29],unus:25,unzip:43,updat:[3,6,7,8,10,11,12,13,14,15,16,17,19,20,21,22,23,24,25,26,29,37,38,39,40,42,43,49,50],update_discounted_reward:26,update_filter_internal_st:[3,50],update_log:[3,50],update_online_network:25,update_step_in_episode_log:[3,50],update_target_network:25,update_transition_before_adding_to_replay_buff:[3,50],upgrad:43,upon:[3,5,37,50],upper:[6,29],usag:[32,48],use:[0,1,2,3,4,5,6,8,9,10,12,13,15,16,20,25,26,27,28,29,31,32,33,36,37,38,40,42,43,48,49,50,51],use_accumulated_reward_as_measur:4,use_cpu:0,use_deterministic_for_evalu:12,use_full_action_spac:28,use_kl_regular:[7,11],use_non_zero_discount_for_terminal_st:8,use_separate_networks_per_head:25,use_target_network_for_evalu:8,use_trust_region_optim:6,used:[0,2,3,5,6,7,8,10,11,12,13,14,18,19,20,21,22,23,25,28,29,31,32,33,34,35,37,38,40,41,42,45,50,51],useful:[0,3,4,24,25,29,31,36,45,49,50,51],user:[25,28,29,39,40,43],userguid:43,uses:[0,1,7,11,17,26,27,29,35,40,41,43,45,49,51],using:[0,3,5,6,7,8,10,11,12,15,16,18,19,20,21,22,24,25,27,28,29,31,34,37,38,39,41,46,49,50,51],usr:43,usual:[31,40],util:[3,39,50],v_max:14,v_min:14,val:[3,36,50],valid:[0,36],valu:[0,2,3,4,5,6,7,8,11,12,13,14,15,16,17,19,20,21,22,24,25,26,28,29,31,32,33,36,37,40,42,43,44,49,50],valuabl:39,value_targets_mix_fract:[7,11],valueexcept:[3,50],valueoptimizationag:37,van:4,vari:42,variabl:[25,28,43],variable_scop:25,varianc:[10,29,39,49],variant:[29,33,49],variou:[3,26,33,48],vector:[3,4,8,9,11,13,25,28,31,36,38,42,49,50],vectorobservationspac:31,verbos:28,veri:[0,7,8,10,17,21,39,49,51],version:[7,11,26],versu:25,vert:12,vertic:25,via:[2,13],video:[0,3,28],video_dump_method:0,view:39,viewabl:[3,50],visit:45,visual:[0,3,28,46,48],visualization_paramet:28,visualizationparamet:[3,28],vizdoom:[43,46],vote:29,wai:[3,7,11,29,32,38,40,42,48,49,50,51],wait:[5,25,41],walk:38,want:[3,4,24,25,31,32,33,50],warn:[29,31,32],wasn:26,weather_id:28,websit:[28,48],weight:[4,5,6,7,8,11,12,13,14,15,16,18,19,20,21,22,23,24,25,29,40,42,49],well:[21,25,29,36,49],went:11,were:[4,14,15,16,17,21,23,24,25,26,32,45],west:43,wget:43,what:[11,49],whatev:[3,50],when:[0,3,4,5,6,7,8,9,10,11,12,21,25,26,27,28,29,31,34,35,37,38,39,50,51],whenev:41,where:[2,3,4,5,6,7,11,13,14,17,19,21,22,24,25,26,28,29,31,32,36,39,49,50],which:[0,1,2,3,5,6,7,8,10,11,12,13,17,19,20,21,22,23,25,26,27,28,29,31,33,34,35,36,37,38,39,40,41,42,44,45,46,48,49,50,51],who:40,why:[39,40],window:[31,32],wise:31,within:[0,7,11,20,29,36,39],without:[5,11,32,33,39,49,51],won:[4,25],wont:25,work:[3,19,25,29,31,32,39,40,49,50,51],workaround:0,workdir:43,worker:[0,19,25,27,31,33,34,35,39,41,42,43,49,51],worker_devic:25,worker_host:0,wors:49,would:[25,43,49],wrap:[28,31,40,46],wrapper:[3,25,26,28,36,42,50],write:[0,3,50],written:[3,24,27,50],www:43,xdist:43,y_t:[8,12,13,15,16,18,20,21,22],year:49,yet:[17,38],you:[4,31,33,37,38,43,48,51],your:[37,38,43,51],yuv:31,z_i:[14,24],z_j:[14,24],zero:[2,15,16],zip:43,zlib1g:43},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","ACER","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Soft Actor-Critic","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Data Stores","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Memory Backends","Orchestrators","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","Distributed Coach - Horizontal Scale-Out","Network Design","Usage - Distributed Coach","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":21,"function":20,"new":[37,38],"switch":51,Adding:[37,38],Using:38,acer:6,across:49,action:[4,5,6,7,8,9,10,11,12,13,20,21,32,36,49],actioninfo:26,actor:[5,9,12],addit:[0,51],additivenois:29,advantag:[20,22],agent:[3,37,40,51],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,44,49,51],api:38,architectur:25,attentionactionspac:36,backend:34,balancedexperiencereplai:33,batch:26,behavior:1,benchmark:45,between:51,blizzard:28,boltzmann:29,bootstrap:[13,29],boxactionspac:36,build:43,can:49,carla:28,carlo:18,categor:[14,29],choos:[4,5,6,7,8,9,10,11,12,13,20,21],clip:7,clone:[1,43],coach:[38,39,41,43,48],collect:49,compar:39,compoundactionspac:36,condit:2,config:43,contain:43,continu:[7,11,12,49],continuousentropi:29,control:[21,28,40],copi:42,core:26,creat:43,critic:[5,9,12],dashboard:39,data:27,deep:[8,16,51],deepmind:28,demonstr:49,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],design:42,determinist:8,direct:4,discret:[5,6,10,49],discreteactionspac:36,distribut:[41,43],distributedtaskparamet:0,doe:49,doubl:15,dqn:[13,14,15,17,23],duel:17,dump:51,egreedi:29,environ:[28,38,46,49,51],envrespons:26,episod:[21,26,33],episodicexperiencereplai:33,episodichindsightexperiencereplai:33,episodichrlhindsightexperiencereplai:33,evalu:51,experiencereplai:33,explor:29,explorationpolici:29,featur:47,file:43,filter:[30,31,32],flag:51,flow:40,framework:51,from:49,futur:4,gener:17,gif:51,goal:36,gradient:[8,10],graph:40,greedi:29,gym:[28,38],have:49,hierarch:9,horizont:41,human:[49,51],imag:43,imageobservationspac:36,imit:[2,51],implement:43,input:31,interfac:43,keep:42,kubernet:35,learn:[2,19,22,48,51],level:40,manag:40,memori:[33,34],mix:18,mont:18,more:49,multi:51,multipl:49,multiselectactionspac:36,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,42],networkwrapp:25,neural:21,nfsdatastor:27,node:[49,51],non:33,normal:20,observ:[31,36],observationclippingfilt:31,observationcropfilt:31,observationmoveaxisfilt:31,observationnormalizationfilt:31,observationreductionbysubpartsnamefilt:31,observationrescalesizebyfactorfilt:31,observationrescaletosizefilt:31,observationrgbtoyfilt:31,observationsqueezefilt:31,observationstackingfilt:31,observationtouint8filt:31,openai:[28,38],optim:[7,11],orchestr:35,ouprocess:29,out:41,output:32,pain:49,parallel:49,paramet:0,parameternois:29,persist:22,plai:51,planarmapsobservationspac:36,polici:[7,8,10,11,29],predict:4,prerequisit:43,presetvalidationparamet:0,prioritizedexperiencereplai:33,process:49,proxim:[7,11],push:43,qdnd:33,quantil:23,rainbow:24,redispubsubbackend:34,regress:23,reinforc:48,render:51,repositori:43,reward:31,rewardclippingfilt:31,rewardnormalizationfilt:31,rewardrescalefilt:31,run:[39,43],s3datastor:27,sampl:49,scale:41,select:49,signal:39,simul:49,singl:51,singleepisodebuff:33,soft:12,solv:49,space:[36,49],starcraft:28,statist:39,step:19,store:[13,27],structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],suit:28,support:41,sync:42,synchron:41,task:49,taskparamet:0,test:50,thread:51,through:51,track:39,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,51],transit:[13,26],transitioncollect:33,truncatednorm:29,type:[26,41],ucb:29,usag:[43,51],vectorobservationspac:36,visual:[39,51],visualizationparamet:0,vizdoom:28,you:49,your:49}})