mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
1 line
53 KiB
JavaScript
1 line
53 KiB
JavaScript
Search.setIndex({docnames:["components/additional_parameters","components/agents/imitation/bc","components/agents/imitation/cil","components/agents/index","components/agents/other/dfp","components/agents/policy_optimization/ac","components/agents/policy_optimization/cppo","components/agents/policy_optimization/ddpg","components/agents/policy_optimization/hac","components/agents/policy_optimization/pg","components/agents/policy_optimization/ppo","components/agents/value_optimization/bs_dqn","components/agents/value_optimization/categorical_dqn","components/agents/value_optimization/double_dqn","components/agents/value_optimization/dqn","components/agents/value_optimization/dueling_dqn","components/agents/value_optimization/mmc","components/agents/value_optimization/n_step","components/agents/value_optimization/naf","components/agents/value_optimization/nec","components/agents/value_optimization/pal","components/agents/value_optimization/qr_dqn","components/agents/value_optimization/rainbow","components/architectures/index","components/core_types","components/environments/index","components/exploration_policies/index","components/filters/index","components/filters/input_filters","components/filters/output_filters","components/memories/index","components/spaces","contributing/add_agent","contributing/add_env","dashboard","design/control_flow","design/horizontal_scaling","design/network","features/algorithms","features/benchmarks","features/environments","features/index","index","selecting_an_algorithm","test","usage"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["components/additional_parameters.rst","components/agents/imitation/bc.rst","components/agents/imitation/cil.rst","components/agents/index.rst","components/agents/other/dfp.rst","components/agents/policy_optimization/ac.rst","components/agents/policy_optimization/cppo.rst","components/agents/policy_optimization/ddpg.rst","components/agents/policy_optimization/hac.rst","components/agents/policy_optimization/pg.rst","components/agents/policy_optimization/ppo.rst","components/agents/value_optimization/bs_dqn.rst","components/agents/value_optimization/categorical_dqn.rst","components/agents/value_optimization/double_dqn.rst","components/agents/value_optimization/dqn.rst","components/agents/value_optimization/dueling_dqn.rst","components/agents/value_optimization/mmc.rst","components/agents/value_optimization/n_step.rst","components/agents/value_optimization/naf.rst","components/agents/value_optimization/nec.rst","components/agents/value_optimization/pal.rst","components/agents/value_optimization/qr_dqn.rst","components/agents/value_optimization/rainbow.rst","components/architectures/index.rst","components/core_types.rst","components/environments/index.rst","components/exploration_policies/index.rst","components/filters/index.rst","components/filters/input_filters.rst","components/filters/output_filters.rst","components/memories/index.rst","components/spaces.rst","contributing/add_agent.rst","contributing/add_env.rst","dashboard.rst","design/control_flow.rst","design/horizontal_scaling.rst","design/network.rst","features/algorithms.rst","features/benchmarks.rst","features/environments.rst","features/index.rst","index.rst","selecting_an_algorithm.rst","test.rst","usage.rst"],objects:{"rl_coach.agents.actor_critic_agent":{ActorCriticAlgorithmParameters:[5,0,1,""]},"rl_coach.agents.agent":{Agent:[3,0,1,""]},"rl_coach.agents.agent.Agent":{act:[3,1,1,""],call_memory:[3,1,1,""],choose_action:[3,1,1,""],create_networks:[3,1,1,""],get_predictions:[3,1,1,""],get_state_embedding:[3,1,1,""],handle_episode_ended:[3,1,1,""],init_environment_dependent_modules:[3,1,1,""],learn_from_batch:[3,1,1,""],log_to_screen:[3,1,1,""],observe:[3,1,1,""],parent:[3,2,1,""],phase:[3,2,1,""],post_training_commands:[3,1,1,""],prepare_batch_for_inference:[3,1,1,""],register_signal:[3,1,1,""],reset_evaluation_state:[3,1,1,""],reset_internal_state:[3,1,1,""],run_pre_network_filter_for_inference:[3,1,1,""],save_checkpoint:[3,1,1,""],set_environment_parameters:[3,1,1,""],set_incoming_directive:[3,1,1,""],set_session:[3,1,1,""],setup_logger:[3,1,1,""],sync:[3,1,1,""],train:[3,1,1,""],update_log:[3,1,1,""],update_step_in_episode_log:[3,1,1,""],update_transition_before_adding_to_replay_buffer:[3,1,1,""]},"rl_coach.agents.bc_agent":{BCAlgorithmParameters:[1,0,1,""]},"rl_coach.agents.categorical_dqn_agent":{CategoricalDQNAlgorithmParameters:[12,0,1,""]},"rl_coach.agents.cil_agent":{CILAlgorithmParameters:[2,0,1,""]},"rl_coach.agents.clipped_ppo_agent":{ClippedPPOAlgorithmParameters:[6,0,1,""]},"rl_coach.agents.ddpg_agent":{DDPGAlgorithmParameters:[7,0,1,""]},"rl_coach.agents.dfp_agent":{DFPAlgorithmParameters:[4,0,1,""]},"rl_coach.agents.dqn_agent":{DQNAgent:[44,0,1,""],DQNAlgorithmParameters:[14,0,1,""]},"rl_coach.agents.dqn_agent.DQNAgent":{act:[44,1,1,""],call_memory:[44,1,1,""],choose_action:[44,1,1,""],create_networks:[44,1,1,""],get_predictions:[44,1,1,""],get_state_embedding:[44,1,1,""],handle_episode_ended:[44,1,1,""],init_environment_dependent_modules:[44,1,1,""],learn_from_batch:[44,1,1,""],log_to_screen:[44,1,1,""],observe:[44,1,1,""],parent:[44,2,1,""],phase:[44,2,1,""],post_training_commands:[44,1,1,""],prepare_batch_for_inference:[44,1,1,""],register_signal:[44,1,1,""],reset_evaluation_state:[44,1,1,""],reset_internal_state:[44,1,1,""],run_pre_network_filter_for_inference:[44,1,1,""],save_checkpoint:[44,1,1,""],set_environment_parameters:[44,1,1,""],set_incoming_directive:[44,1,1,""],set_session:[44,1,1,""],setup_logger:[44,1,1,""],sync:[44,1,1,""],train:[44,1,1,""],update_log:[44,1,1,""],update_step_in_episode_log:[44,1,1,""],update_transition_before_adding_to_replay_buffer:[44,1,1,""]},"rl_coach.agents.mmc_agent":{MixedMonteCarloAlgorithmParameters:[16,0,1,""]},"rl_coach.agents.n_step_q_agent":{NStepQAlgorithmParameters:[17,0,1,""]},"rl_coach.agents.naf_agent":{NAFAlgorithmParameters:[18,0,1,""]},"rl_coach.agents.nec_agent":{NECAlgorithmParameters:[19,0,1,""]},"rl_coach.agents.pal_agent":{PALAlgorithmParameters:[20,0,1,""]},"rl_coach.agents.policy_gradients_agent":{PolicyGradientAlgorithmParameters:[9,0,1,""]},"rl_coach.agents.ppo_agent":{PPOAlgorithmParameters:[10,0,1,""]},"rl_coach.agents.qr_dqn_agent":{QuantileRegressionDQNAlgorithmParameters:[21,0,1,""]},"rl_coach.agents.rainbow_dqn_agent":{RainbowDQNAlgorithmParameters:[22,0,1,""]},"rl_coach.architectures.architecture":{Architecture:[23,0,1,""]},"rl_coach.architectures.architecture.Architecture":{accumulate_gradients:[23,1,1,""],apply_and_reset_gradients:[23,1,1,""],apply_gradients:[23,1,1,""],get_variable_value:[23,1,1,""],get_weights:[23,1,1,""],parallel_predict:[23,3,1,""],predict:[23,1,1,""],reset_accumulated_gradients:[23,1,1,""],set_variable_value:[23,1,1,""],set_weights:[23,1,1,""],train_on_batch:[23,1,1,""]},"rl_coach.architectures.network_wrapper":{NetworkWrapper:[23,0,1,""]},"rl_coach.architectures.network_wrapper.NetworkWrapper":{apply_gradients_and_sync_networks:[23,1,1,""],apply_gradients_to_global_network:[23,1,1,""],apply_gradients_to_online_network:[23,1,1,""],get_global_variables:[23,1,1,""],get_local_variables:[23,1,1,""],parallel_prediction:[23,1,1,""],set_is_training:[23,1,1,""],sync:[23,1,1,""],train_and_sync_networks:[23,1,1,""],update_online_network:[23,1,1,""],update_target_network:[23,1,1,""]},"rl_coach.base_parameters":{AgentParameters:[3,0,1,""],DistributedTaskParameters:[0,0,1,""],NetworkParameters:[23,0,1,""],PresetValidationParameters:[0,0,1,""],TaskParameters:[0,0,1,""],VisualizationParameters:[0,0,1,""]},"rl_coach.core_types":{ActionInfo:[24,0,1,""],Batch:[24,0,1,""],EnvResponse:[24,0,1,""],Episode:[24,0,1,""],Transition:[24,0,1,""]},"rl_coach.core_types.Batch":{actions:[24,1,1,""],game_overs:[24,1,1,""],goals:[24,1,1,""],info:[24,1,1,""],info_as_list:[24,1,1,""],n_step_discounted_rewards:[24,1,1,""],next_states:[24,1,1,""],rewards:[24,1,1,""],shuffle:[24,1,1,""],size:[24,2,1,""],slice:[24,1,1,""],states:[24,1,1,""]},"rl_coach.core_types.Episode":{get_first_transition:[24,1,1,""],get_last_transition:[24,1,1,""],get_transition:[24,1,1,""],get_transitions_attribute:[24,1,1,""],insert:[24,1,1,""],is_empty:[24,1,1,""],length:[24,1,1,""],update_discounted_rewards:[24,1,1,""]},"rl_coach.environments.carla_environment":{CarlaEnvironment:[25,0,1,""]},"rl_coach.environments.control_suite_environment":{ControlSuiteEnvironment:[25,0,1,""]},"rl_coach.environments.doom_environment":{DoomEnvironment:[25,0,1,""]},"rl_coach.environments.environment":{Environment:[25,0,1,""]},"rl_coach.environments.environment.Environment":{action_space:[25,2,1,""],get_action_from_user:[25,1,1,""],get_available_keys:[25,1,1,""],get_goal:[25,1,1,""],get_random_action:[25,1,1,""],get_rendered_image:[25,1,1,""],goal_space:[25,2,1,""],handle_episode_ended:[25,1,1,""],last_env_response:[25,2,1,""],phase:[25,2,1,""],render:[25,1,1,""],reset_internal_state:[25,1,1,""],set_goal:[25,1,1,""],state_space:[25,2,1,""],step:[25,1,1,""]},"rl_coach.environments.gym_environment":{GymEnvironment:[25,0,1,""]},"rl_coach.environments.starcraft2_environment":{StarCraft2Environment:[25,0,1,""]},"rl_coach.exploration_policies":{AdditiveNoise:[26,0,1,""],Boltzmann:[26,0,1,""],Bootstrapped:[26,0,1,""],Categorical:[26,0,1,""],ContinuousEntropy:[26,0,1,""],EGreedy:[26,0,1,""],ExplorationPolicy:[26,0,1,""],Greedy:[26,0,1,""],OUProcess:[26,0,1,""],ParameterNoise:[26,0,1,""],TruncatedNormal:[26,0,1,""],UCB:[26,0,1,""]},"rl_coach.exploration_policies.ExplorationPolicy":{change_phase:[26,1,1,""],get_action:[26,1,1,""],requires_action_values:[26,1,1,""],reset:[26,1,1,""]},"rl_coach.filters.action":{AttentionDiscretization:[29,0,1,""],BoxDiscretization:[29,0,1,""],BoxMasking:[29,0,1,""],FullDiscreteActionSpaceMap:[29,0,1,""],LinearBoxToBoxMap:[29,0,1,""],PartialDiscreteActionSpaceMap:[29,0,1,""]},"rl_coach.filters.observation":{ObservationClippingFilter:[28,0,1,""],ObservationCropFilter:[28,0,1,""],ObservationMoveAxisFilter:[28,0,1,""],ObservationNormalizationFilter:[28,0,1,""],ObservationRGBToYFilter:[28,0,1,""],ObservationReductionBySubPartsNameFilter:[28,0,1,""],ObservationRescaleSizeByFactorFilter:[28,0,1,""],ObservationRescaleToSizeFilter:[28,0,1,""],ObservationSqueezeFilter:[28,0,1,""],ObservationStackingFilter:[28,0,1,""],ObservationToUInt8Filter:[28,0,1,""]},"rl_coach.filters.reward":{RewardClippingFilter:[28,0,1,""],RewardNormalizationFilter:[28,0,1,""],RewardRescaleFilter:[28,0,1,""]},"rl_coach.memories.episodic":{EpisodicExperienceReplay:[30,0,1,""],EpisodicHRLHindsightExperienceReplay:[30,0,1,""],EpisodicHindsightExperienceReplay:[30,0,1,""],SingleEpisodeBuffer:[30,0,1,""]},"rl_coach.memories.non_episodic":{BalancedExperienceReplay:[30,0,1,""],ExperienceReplay:[30,0,1,""],PrioritizedExperienceReplay:[30,0,1,""],QDND:[30,0,1,""],TransitionCollection:[30,0,1,""]},"rl_coach.spaces":{ActionSpace:[31,0,1,""],AttentionActionSpace:[31,0,1,""],BoxActionSpace:[31,0,1,""],CompoundActionSpace:[31,0,1,""],DiscreteActionSpace:[31,0,1,""],GoalsSpace:[31,0,1,""],ImageObservationSpace:[31,0,1,""],MultiSelectActionSpace:[31,0,1,""],ObservationSpace:[31,0,1,""],PlanarMapsObservationSpace:[31,0,1,""],Space:[31,0,1,""],VectorObservationSpace:[31,0,1,""]},"rl_coach.spaces.ActionSpace":{clip_action_to_space:[31,1,1,""],is_point_in_space_shape:[31,1,1,""],sample:[31,1,1,""],sample_with_info:[31,1,1,""],val_matches_space_definition:[31,1,1,""]},"rl_coach.spaces.GoalsSpace":{DistanceMetric:[31,0,1,""],clip_action_to_space:[31,1,1,""],distance_from_goal:[31,1,1,""],get_reward_for_goal_and_state:[31,1,1,""],goal_from_state:[31,1,1,""],is_point_in_space_shape:[31,1,1,""],sample:[31,1,1,""],sample_with_info:[31,1,1,""],val_matches_space_definition:[31,1,1,""]},"rl_coach.spaces.ObservationSpace":{is_point_in_space_shape:[31,1,1,""],sample:[31,1,1,""],val_matches_space_definition:[31,1,1,""]},"rl_coach.spaces.Space":{is_point_in_space_shape:[31,1,1,""],sample:[31,1,1,""],val_matches_space_definition:[31,1,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","attribute","Python attribute"],"3":["py","staticmethod","Python static method"]},objtypes:{"0":"py:class","1":"py:method","2":"py:attribute","3":"py:staticmethod"},terms:{"0mq":36,"100x100":29,"160x160":28,"1_0":[12,22],"1st":26,"20x20":29,"210x160":28,"2nd":26,"50k":35,"\u03b3cdot":14,"abstract":32,"boolean":[3,24,31,44],"break":34,"case":[0,3,5,19,23,24,26,31,36,43,44,45],"class":[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,32,33,35,36,38,44],"default":[0,26,36,45],"enum":[23,25,31],"export":0,"final":[7,13,14,16,20,35],"float":[3,4,5,6,7,9,10,12,16,19,20,21,23,24,25,26,28,29,30,31,32,44],"function":[0,1,3,6,7,10,23,25,26,31,32,33,35,37,44],"import":[15,26,30,33,43,45],"int":[0,3,4,5,6,9,12,17,19,21,22,24,25,26,28,29,30,31,44],"long":37,"na\u00efv":36,"new":[0,3,6,7,10,19,20,23,24,29,35,36,42,43,44],"return":[0,3,7,9,10,11,16,19,20,22,23,24,25,26,28,30,31,32,33,35,43,44],"short":[0,35],"static":23,"super":[32,33],"switch":34,"true":[0,3,4,5,6,7,10,19,20,22,23,24,25,26,29,30,31,44],"try":[4,39,43],"while":[0,5,7,8,9,10,23,25,34,36,37,43,45],Adding:[15,42],And:[33,43],But:[34,43],Doing:43,For:[0,1,2,3,4,6,9,11,12,13,14,17,19,20,23,24,25,26,27,28,29,31,32,33,35,37,39,44,45],Has:23,Its:44,One:[21,45],That:34,The:[0,1,2,3,4,5,6,7,9,10,11,12,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,32,34,35,37,39,40,42,43,44,45],Then:[4,6,7,11,18,20],There:[6,10,23,26,27,32,33,37,45],These:[1,2,3,21,25,37],Use:[1,2,7,18,19],Used:26,Uses:43,Using:[7,11,13,14],Will:23,With:[26,42],__init__:[25,32,33],_index:[5,17],_render:33,_restart_environment_episod:33,_take_act:33,_update_st:33,a2c:43,a3c:[9,17,34,43],a_i:19,a_t:[4,5,7,11,12,13,14,16,17,18,20,22],a_valu:5,abl:[29,43],about:[3,24,35,36,44,45],abov:[7,23,35],abs:[17,30],absolut:26,acceler:18,accept:25,access:[23,32],accord:[0,3,4,5,7,11,17,23,24,26,31,34,35,37,44],accordingli:[19,31,35,45],account:[4,6,10,19,20,26],accumul:[3,4,5,9,17,19,22,23,28,43,44],accumulate_gradi:23,accumulated_gradi:23,accur:43,achiev:[0,4,6,25,28,30,31,39,43,45],across:[9,16,34],act:[3,4,7,11,21,31,32,35,44],action:[1,2,3,12,13,14,15,16,17,20,21,22,23,24,25,26,27,30,32,33,35,37,44],action_idx:33,action_intrinsic_reward:24,action_penalti:7,action_prob:24,action_spac:[25,26],action_space_s:23,action_valu:[24,26],actioninfo:[3,31,35,44],actionspac:[26,31],actiontyp:33,activ:[7,23],actor:[3,6,7,10,26,37,43],actor_critic_ag:5,actorcriticag:32,actorcriticalgorithmparamet:5,actual:[4,5,12,13,14,21,22,26,29,30],adam:[6,23],adam_optimizer_beta1:23,adam_optimizer_beta2:23,adapt:[6,10],add:[7,8,18,24,26,28,33,35,36],add_rendered_image_to_env_respons:0,added:[0,4,6,9,10,19,26,30,32],adding:[3,10,26,32,44],addit:[3,23,24,25,26,28,30,31,33,34,35,36,37,42,44],addition:[23,25,28,32,33,35,39,40,45],additional_fetch:23,additional_simulator_paramet:[25,33],additionali:34,additive_nois:26,additivenoiseparamet:26,advanc:[22,42],advantag:[3,5,6,10,15,26,36],affect:[0,11,23],aforement:[13,14,20],after:[0,3,7,9,10,17,18,20,22,23,24,25,28,31,44,45],again:26,agent:[0,1,2,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,33,34,37,38,39,42,43,44],agent_paramet:[3,23,44],agentparamet:[3,23,32],aggreg:35,ahead:[4,43],aim:26,algorithm:[3,24,26,32,34,35,36,39,41,42,44],algorithmparamet:[3,32],all:[0,3,9,11,19,20,23,24,25,26,28,29,31,32,33,34,35,36,37,40,44,45],allow:[0,3,4,15,23,24,25,26,27,28,29,30,34,35,36,37,42,43,44,45],allow_brak:25,allow_duplicates_in_batch_sampl:30,allow_no_action_to_be_select:31,along:[19,25,26,40],alpha:[16,20,30],alreadi:[19,24,33,36,43],also:[5,6,19,20,23,25,31,32,34,39,43,45],altern:[25,33,40],alwai:[23,26,29],amount:[7,9,16,20,26,35,43],analysi:34,analyz:34,ani:[3,23,24,25,29,30,32,35,36,37,44],anoth:[3,15,23,27,44],answer:43,api:[25,37,40,42],appear:[3,44],appli:[3,5,7,9,17,23,24,26,28,43,44],applic:43,apply_and_reset_gradi:23,apply_gradi:23,apply_gradients_and_sync_network:23,apply_gradients_every_x_episod:[5,9,17],apply_gradients_to_global_network:23,apply_gradients_to_online_network:23,approx:7,approxim:[37,43],arbitrari:28,architectur:[3,15,32,42,44],architecture_num_q_head:26,area:29,arg:[3,44],argmax_a:[13,16,20],argument:[3,12,22,25,31,35,44],around:[23,24,37],arrai:[3,23,24,25,28,31,33,44],art:[3,38],artifici:30,arxiv:[17,30],aspect:[26,28,34],assign:[0,2,5,23,26],assign_kl_coeffici:23,assign_op:23,assum:[24,26,28,30,43],async:23,async_train:23,asynchron:[5,17,23],atari:[14,25,28,45],ath:15,atom:[12,21,22],attach:25,attend:29,attent:29,attentionactionspac:29,attentiondiscret:29,attribut:24,attribute_nam:24,author:[25,39,40],auto_select_all_armi:25,automat:[23,45],autonom:[25,40,42],auxiliari:[25,40],avail:[4,23,24,25,34,36,42,43,45],averag:[6,10,23,34,35],aws:36,axes:[28,34],axi:[28,34],axis_origin:28,axis_target:28,back:6,backend:[23,45],background:45,backpropag:19,backward:23,balanc:2,band:34,base:[6,10,16,18,20,25,30,32,35,40,43],base_paramet:[0,3,23,25,26],baselin:43,basic:[9,24,45],batch:[1,2,3,4,5,7,9,10,11,12,13,14,15,17,20,21,22,23,30,32,35,44],batch_siz:23,bc_agent:1,bcalgorithmparamet:1,becaus:35,becom:7,been:[15,24,28,36,39,43],befor:[3,5,10,22,23,24,28,35,37,43,44],begin:[0,4,35],behav:31,behavior:[3,28,30,32,39,43,44,45],being:[3,32,42,43,44],bellman:[12,21,22],benchmark:[34,41,42,43],best:[43,45],beta1:23,beta2:23,beta:[7,9,30],beta_entropi:[5,6,9,10],better:[15,43],between:[0,1,2,3,6,7,9,10,12,16,17,19,21,22,23,24,25,26,29,30,31,32,34,35,36,37,42,43],bfg:[6,10],big:[10,12,22],bilinear:28,bin:29,binari:11,bind:23,binomi:11,bit:28,blizzard:40,blob:[25,28],block:42,blog:42,boilerpl:35,bolling:34,bool:[0,3,4,5,6,7,10,19,20,22,23,24,25,26,30,31,44],boost:43,bootstrap:[3,5,6,7,10,16,17,19,20,22,24,43],bootstrap_total_return_from_old_polici:[19,24],both:[3,6,23,25,26,29,43,44],bound:[6,10,12,22,26,31,43],box:[26,29,31],boxactionspac:29,boxdiscret:29,boxmask:29,breakout_a3c:45,breakout_dqn:45,breakoutdeterminist:[25,45],bring:10,buffer:[1,2,3,11,12,13,14,17,19,20,21,22,30,35,43,44,45],build:[27,42,43],built:[32,35,36],button:[34,45],c51:12,calcul:[3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,26,30,32,44],call:[0,3,9,17,23,24,25,35,44],call_memori:[3,44],callabl:31,camera:[25,33],camera_height:25,camera_width:25,cameratyp:[25,33],can:[0,2,3,5,6,7,10,20,23,24,25,26,27,28,29,31,32,33,34,35,36,37,40,42,44,45],cannot:[3,44],capabl:36,carla:[28,40],carla_environ:25,carlaenviron:25,carlaenvironmentparamet:25,carlo:[3,20],cartpol:[25,33],cartpole_a3c:45,cartpole_dqn:45,categor:[3,5,43],categori:[27,28],categorical_dqn_ag:12,categoricaldqnalgorithmparamet:12,caus:[28,34],cdot:[5,6,7,9,11,12,13,14,16,18,20,22],central:[23,34],ceph:36,chain:7,challeng:35,chang:[0,3,6,7,10,11,15,17,20,26,35,36,44],change_phas:26,channel:28,channels_axi:31,characterist:36,check:[0,3,24,31,44],checkpoint:[0,3,44,45],checkpoint_id:[3,44],checkpoint_restore_dir:[0,45],checkpoint_save_dir:0,checkpoint_save_sec:0,child:23,choic:[32,36],choos:[3,15,20,26,27,29,31,32,35,37,43,44,45],choose_act:[3,32,35,44],chosen:[3,20,26,29,32,44],chunk:10,cil:43,cil_ag:2,cilalgorithmparamet:2,clean:32,clear:36,client:36,clip:[3,7,10,23,28,31,43],clip_action_to_spac:31,clip_critic_target:7,clip_gradi:23,clip_high:26,clip_likelihood_ratio_using_epsilon:[6,10],clip_low:26,clip_max:28,clip_min:28,clipbyglobalnorm:23,clipped_ppo_ag:6,clippedppoalgorithmparamet:6,clipping_high:28,clipping_low:28,clone:[3,43],coach:[0,3,23,25,26,27,32,35,36,38,39,40,43,45],code:[33,35,36,43],coeffici:[6,10,23,26,30],collect:[6,9,10,17,24,30,35,39,42,45],color:28,combin:[22,37,42,43],comma:0,command:[35,45],common:[32,34,45],commun:36,compar:[0,10,15,43],complet:[24,26,35,36],complex:[23,27,35,36,37,43,45],compon:[3,12,22,23,26,32,35,42,44,45],compos:36,composit:[3,44],compositeag:[3,44],comput:[23,26],con:36,concat:23,concentr:35,concept:36,concern:36,concurr:36,condit:3,confid:26,config:25,configur:[3,5,9,32,44],confus:35,connect:23,connectionist:9,consecut:[7,19],consequ:[17,26],consid:[5,29,34],consist:[7,25,28,29,31,35,40],constantli:45,constantschedul:30,constrain:29,constraint:36,construct:30,consumpt:28,contain:[0,1,2,3,11,23,24,25,33,35,36,44,45],context:36,continu:[1,2,5,7,8,9,18,26,27,29,31,39],continuous_exploration_policy_paramet:26,contribut:[4,42],control:[2,3,5,6,7,10,23,26,28,34,36,40,42,43,44],control_suite_environ:25,controlsuiteenviron:25,conveni:[34,45],converg:9,convers:27,convert:[3,24,26,28,31,35,36,37,44],convolut:[23,37],coordin:[29,36],copi:[7,11,12,13,14,16,17,18,20,21,22,23],core:[3,42,44],core_typ:[3,24,25,31,44],correct:[3,43],correctli:23,correl:26,correpond:24,correspond:[2,3,4,12,13,23,24,26,28,31,33,44],corrupt:36,could:[23,31,36],count:16,countabl:29,counter:[3,44],counterpart:37,cpu:[0,23],crd:45,creat:[3,17,23,28,33,44,45],create_network:[3,44],create_target_network:23,creation:[3,44],critic:[3,6,7,10,26,37,43],crop:[28,29],crop_high:28,crop_low:28,cross:[1,12,22],csv:0,ctrl:34,curr_stat:[3,32,44],current:[0,1,2,3,4,6,7,8,9,10,11,13,14,16,18,19,20,21,23,24,25,26,28,29,31,32,35,36,42,43,44],custom:[25,26,31,32,35,36],custom_reward_threshold:25,cycl:35,dai:45,dashboard:[0,3,42,44],dat:36,data:[0,9,17,23,30,35,39,43,45],databas:36,dataset:[6,10,43,45],date:[19,37,43,45],ddpg:43,ddpg_agent:7,ddpgalgorithmparamet:7,ddqn:[16,20,43],deal:43,debug:[0,34,36,42],decai:[5,6,10,23],decid:[0,3,4,25,32,44],decis:[3,44],decreas:36,dedic:23,deep:[0,3,5,11,13,15,17,18,22,44],deepmind:40,def:[32,33],default_act:31,default_input_filt:33,default_output_filt:33,defin:[0,3,5,6,9,10,17,19,20,23,24,25,26,28,29,30,31,32,33,35,37,39,40,44,45],definit:[3,23,25,31,33,35,44],delai:43,delta:[12,19,22],demonstr:[1,2,45],dens:26,densiti:16,depend:[0,3,23,28,30,31,33,39,43,44],depth:25,descend:43,describ:[3,12,21,28,30,32,35,44],descript:[3,29,31,38,45],design:[35,36,42],desir:[29,32,36],destabil:9,detail:[3,24,36,38,40,42],determin:[2,3,19,24,30,44],determinist:[3,43],develop:[35,39],deviat:[9,10,26,28,34],dfp:43,dfp_agent:4,dfpalgorithmparamet:4,dict:[3,4,23,24,25,26,31,44],dict_siz:30,dictat:4,dictionari:[2,3,23,24,25,30,31,32,44],did:25,differ:[0,1,2,3,4,5,6,9,10,11,15,23,25,26,28,31,32,33,34,36,37,42,43,44],differenti:15,difficult:[34,36,39],difficulti:45,dimens:[24,28,29],dimension:[10,29],dir:45,direct:[3,25,44],directli:[3,5,35,36,37,44],directori:[0,23,32,34,45],disabl:45,disable_fog:25,disappear:25,disassembl:43,discard:[24,28],discount:[7,9,10,16,19,20,22,23,24,43],discret:[1,2,4,6,10,11,12,13,14,15,16,17,19,20,21,22,26,27,28,29,31,35],disentangl:35,disk:[0,36],displai:[0,34],distanc:31,distance_from_go:31,distance_metr:31,distancemetr:31,distil:[3,44],distribut:[5,9,10,12,21,22,23,26,31,36,37,43],divereg:[6,10],diverg:[6,10,22],dnd:[0,19,43],dnd_key_error_threshold:19,dnd_size:19,do_action_hindsight:30,docker:36,document:[36,40],doe:[11,23,28,36],doing:[6,10,27],domain:37,don:[4,26,34,43],done:[0,3,6,9,10,25,28,33,44,45],doom:[25,33,40],doom_basic_bc:45,doom_basic_dqn:45,doom_environ:[25,33,45],doomenviron:[25,33],doomenvironmentparamet:[33,45],doominputfilt:33,doomlevel:25,doomoutputfilt:33,doubl:[3,16,22],down:[23,25],dqn:[3,16,17,22,25,26,28,29,35,37,43],dqn_agent:[14,44],dqnagent:44,dqnalgorithmparamet:14,drive:[2,25,40,42],driving_benchmark:25,due:28,duel:[3,22],dump:[0,3,44],dump_csv:0,dump_gif:0,dump_in_episode_sign:0,dump_mp4:0,dump_one_value_per_episod:[3,44],dump_one_value_per_step:[3,44],dump_parameters_document:0,dump_signals_to_csv_every_x_episod:0,durabl:36,dure:[3,6,9,10,11,19,26,34,35,44,45],dynam:[34,39,43],each:[0,1,2,3,4,5,6,9,10,11,13,14,15,17,19,20,21,23,24,25,26,27,28,29,30,31,32,34,35,36,37,39,43,44],eas:34,easi:[33,34,42],easier:37,easili:[26,36,45],effect:[0,3,6,17,28,35,44],effici:[35,36,43],either:[0,3,5,17,23,26,31,34,36,37,45],elasticach:36,element:[3,11,23,28,31],els:36,embbed:23,embed:[3,19,23,44],embedd:[23,37],embedding_merger_typ:23,embeddingmergertyp:23,empti:24,enabl:[23,37],encod:[28,31],encourag:[18,20,35],end:[2,3,9,22,24,25,28,44,45],enforc:29,engin:[25,40],enough:[4,19,36],ensembl:[26,43],ensur:23,enter:[3,44,45],entir:[10,16,19,22,26,29,35],entri:[19,35],entropi:[1,5,6,9,10,12,22,26],enumer:31,env:24,env_param:33,env_respons:[3,44],enviorn:25,environ:[0,3,4,15,23,24,26,27,28,29,31,32,35,39,41,42,44],environmentparamet:[25,33],environmnet:31,envrespons:[0,3,25,44],episod:[0,3,4,5,9,10,11,16,17,22,25,26,32,33,34,35,44,45],episode_max_tim:25,episodic_hindsight_experience_replai:30,epoch:6,epsilon:[6,26,30],epsilon_schedul:26,equal:2,equat:[7,13,14,17,21],error:[23,43],escap:45,especi:[15,36],essenti:[17,23,29,33,35],estim:[5,6,10,11,16,20,26],estimate_state_value_using_ga:[5,6,10],eta:[6,10],etc:[0,3,23,25,27,31,32,36,40,44],evalu:[0,3,23,24,26,35,36,44],evaluate_onli:0,evaluation_epsilon:26,evaluation_noise_percentag:26,even:[15,23,25,33,34,35,43],everi:[0,5,7,9,11,12,13,14,16,17,18,20,21,22,45],exact:[19,26,39],exactli:23,exampl:[2,3,4,23,24,25,26,27,28,29,31,32,33,35,36,37,44,45],except:[17,24],execut:[24,34,35,36],exhibit:[3,32,44],exist:[19,36],exit:[3,44],expand_dim:24,expect:[0,3,26,36,39,44],experi:[0,7,10,22,25,30,34,35,42,43,45],experiment_path:[0,25],experiment_suit:25,experimentsuit:25,expert:[1,2,24,43],exploit:[26,35],explor:[3,4,5,6,7,8,10,11,16,18,19,32,35,42,43],exploration_polici:26,explorationparamet:[3,26,32],exponenti:[6,10,22,23],expor:3,export_onnx_graph:0,expos:[34,37,42],extend:[25,26,40],extens:[25,40],extent:45,extern:0,extra:[23,24,37],extract:[3,18,19,24,28,31,34,35,44],factor:[7,9,10,20,22,23,24,26,28],failur:36,faithfulli:34,fake:31,fals:[0,3,7,23,24,25,26,29,30,31,33,44],far:[10,28,35,39],fast:36,faster:[15,43],featur:[7,25,37,42,43],feature_minimap_maps_to_us:25,feature_screen_maps_to_us:25,fetch:[23,24],fetched_tensor:23,few:[9,11,12,13,14,16,20,21,22,26,33,36],field:[39,42],file:[0,3,32,35,36,44,45],filesystem:36,fill:[24,33],filter:[0,3,42,44],find:[13,34,40,42],finish:[19,45],finit:29,first:[0,7,10,11,19,21,22,23,24,28,35,37],fit:[31,36],flag:[0,3,23,24,25,44],flicker:25,flow:[27,36,42],follow:[2,3,5,7,9,12,13,14,17,18,19,21,22,23,24,25,26,30,32,33,39,43,44],footprint:28,forc:[23,25,29,33],force_cpu:23,force_environment_reset:[25,33],force_int_bin:29,forced_attention_s:31,form:[4,17,31,43],format:32,formul:5,forward:[23,26],found:[3,38,45],frac:[6,12,22],fraction:[6,10,36],frame:[0,25],frame_skip:25,framework:[0,3,23,32,42,44],framework_typ:0,free:[25,40],from:[0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,39,40,42,44,45],full:[3,9,16,29,44],fulldiscreteactionspacemap:29,fulli:23,func:[3,44],further:36,futur:[0,3,9,24,36,43],future_measurements_weight:4,gae:[5,6,10],gae_lambda:[5,6,10],game:[3,24,25,40,42,44,45],game_ov:24,gamma:[5,7,11,12,13,14,16,17,18,19,20,22],gap:[20,43],gaussian:[10,26],gce:36,gener:[0,5,6,10,11,23,25,26,30,31,32,36,45],general_network:32,get:[3,4,6,7,8,9,10,11,13,14,16,18,20,23,24,25,26,31,35,37,39,44],get_act:26,get_action_from_us:25,get_available_kei:25,get_first_transit:24,get_global_vari:23,get_goal:25,get_last_env_respons:25,get_last_transit:24,get_local_vari:23,get_output_head:32,get_predict:[3,44],get_random_act:25,get_rendered_imag:[25,33],get_reward_for_goal_and_st:31,get_state_embed:[3,44],get_transit:24,get_transitions_attribut:24,get_variable_valu:23,get_weight:23,gif:0,github:[33,39,42],given:[0,1,2,3,4,5,7,9,10,23,24,25,26,28,29,30,31,32,35,44],given_weight:23,global:[3,23,37,44],global_network:23,glue:36,goal:[1,2,3,4,23,24,25,30,35,37,43,44],goal_from_st:31,goal_nam:31,goal_spac:25,goal_vector:4,goals_spac:30,goalsspac:[30,31],goaltorewardconvers:31,going:[27,36],good:[33,34],googl:36,gpu:23,gracefulli:45,gradient:[3,5,6,10,17,19,23,32,43,44],gradientclippingmethod:23,gradients_clipping_method:23,granular:30,graph:0,graphmanag:35,grayscal:[28,31],greedili:35,group:34,grow:22,guidelin:43,gym:40,gym_environ:[25,45],gymenviron:25,gymenvironmentparamet:33,hac:43,had:39,hand:[15,28,35,43],handl:[4,36],handle_episode_end:[3,25,44],handling_targets_after_episode_end:4,handlingtargetsafterepisodeend:4,hard:[34,43],harder:34,has:[0,3,15,19,20,24,26,28,35,36,37,39,43,44],has_glob:23,has_target:23,hat:[6,12,22],have:[0,3,4,23,25,26,28,29,30,35,37,39,44],head:[1,2,3,5,9,11,15,18,19,23,26,32,37,44],headparamet:23,heads_paramet:23,health_gath:25,heatup:[26,35],help:[20,24,34,35,36,43],here:[33,35],heurist:[10,26],hide:37,hierarch:[31,35],hierarchi:[3,35,43,44],high:[7,10,28,29,31,34],high_i:31,high_kl_penalty_coeffici:10,high_x:31,higher:[10,36],highest:[5,9,20,26,28,29,31],highli:[0,33,43],hindsight:[8,30,43],hindsight_goal_selection_method:30,hindsight_transitions_per_regular_transit:30,hindsightgoalselectionmethod:30,hold:[11,24,30,34,35,37],horizont:36,hostnam:0,hot:31,how:[4,6,10,26,36,43],howev:36,hrl:30,http:[17,30],huber:21,huber_loss_interv:21,human:[0,25],human_control:25,hyper:[32,39],hyperparamet:32,ident:23,identifi:[23,31],ignor:25,imag:[0,23,25,28,29,31,33,37,45],imit:[3,24,38,43],impact:[23,36],implement:[3,6,10,23,25,26,30,32,33,36,39,43],impli:45,importance_weight:23,importantli:35,improv:[5,15,22,25,35,43],includ:[0,3,4,25,27,28,37,40,44,45],incomplet:36,increas:[10,20,28,36,43],increment:[3,44],incur:36,index:[0,2,24,25,28,29,30],indic:31,inf:[28,31],infer:[3,23,44],infinit:43,info:[3,11,24,31,33,44],info_as_list:24,inform:[3,4,17,24,25,27,34,35,40,44],infrastructur:36,inherit:[3,32,33],init_environment_dependent_modul:[3,44],initi:[3,4,10,20,23,24,32,35,42,44],initial_feed_dict:23,initial_kl_coeffici:10,innov:43,input:[1,2,3,4,7,11,13,14,16,18,19,20,23,27,31,35,37,44],input_embedders_paramet:23,input_high:28,input_low:28,input_space_high:29,input_space_low:29,inputembedderparamet:23,inputfilt:35,insert:[19,24],inspect:0,instal:45,instanc:[3,31,37],instanti:[3,25,35],instead:[0,3,6,17,20,23,28,29,35,36,43,44],intact:[11,39],integ:[0,28,29],integr:[33,35,42],intel:42,intend:[9,23,26,35],interact:[24,35,42,45],interchang:36,interest:[23,34],interfac:[25,34,36,40],interleav:36,intermedi:19,intern:[3,9,17,23,24,27,35,44,45],interpol:28,intersect:43,interv:21,intrins:24,intro:42,introduc:43,invers:[25,40],investig:36,invok:35,involv:32,is_empti:24,is_point_in_space_shap:31,item:24,iter:[3,5,7,10,15,23,36,44],its:[0,3,12,22,23,24,26,31,35,36,43,44,45],itself:[23,31,45],job:0,job_typ:0,joint:25,json:0,jump:[4,29],jupyt:32,just:[3,10,20,22,33,35,37,44,45],kafka:36,kapa:21,keep:[14,24,28,36,45],kei:[2,19,23,24,25,30,32,34,36,45],key_error_threshold:30,key_width:30,keyboard:[25,45],kinesi:36,kl_coeffici:23,kl_coefficient_ph:23,kml:36,know:[3,43,44,45],knowledg:[3,35,44],known:[24,34,39,43],kubernet:36,kwarg:25,l2_norm_added_delta:19,l2_regular:23,lab:36,lack:34,laid:36,lamb:26,lambda:[5,6,10,26],lane:2,larg:[26,28,36,40],larger:[23,36],last:[4,10,19,24,25,28],last_env_respons:25,lastli:35,latenc:36,later:[0,3,23,36,44,45],latest:[17,19,35],launch:36,layer:[23,26,30,35,37],lazi:[24,28],lazystack:28,lbfg:23,lead:26,learn:[0,3,4,5,7,8,9,11,12,13,14,15,18,21,22,23,24,25,26,28,34,35,37,38,39,40,43,44],learn_from_batch:[3,32,35,44],learner:23,learning_r:[23,30],learning_rate_decay_r:23,learning_rate_decay_step:23,least:[37,43],leav:[10,11],left:[2,43],len:36,length:[4,5,6,10,17,19,23,24],less:[15,36,43],level:[0,25,33,45],levelmanag:[3,35,44],levelselect:25,librari:[25,40],like:[25,31,35,36,37,43],likelihood:[6,10],line:[3,35,44,45],linear:29,linearboxtoboxmap:29,linearli:29,list:[0,3,4,23,24,25,26,28,29,31,32,36,44,45],listen:36,load:[0,34,36,45],load_memory_from_file_path:45,local:[3,23,37,44],locat:[21,24,28,43],lock:36,log:[0,3,5,9,44],log_to_screen:[3,44],logger:[0,3,44],longer:36,look:[33,36],loop:35,loss:[1,2,3,6,9,10,12,13,14,21,22,23,26,32,37,44],lot:[26,34,36,39,43],low:[7,10,28,29,31],low_i:31,low_x:31,lower:[0,30,35,36],lowest:[28,29,31],lstm:37,lumin:28,lvert:[12,22],lvl:45,machin:36,mai:[0,23,36,38,45],main:[3,32,35,37,38,44,45],major:26,make:[0,3,23,25,32,34,39,43,44],manag:23,mandatori:[31,33,37],mani:[3,15,36,38,39],manner:[10,16,17,20,28,35],map:[3,23,25,27,28,29,31,32,44],mark:24,markdown:44,mask:[11,29],masked_target_space_high:29,masked_target_space_low:29,master:[3,35,36,44],match:[2,19,23,31],mathbb:5,mathop:5,max:[5,12,17,22,28],max_a:[11,14,19,20],max_action_valu:24,max_episodes_to_achieve_reward:0,max_fps_for_human_control:0,max_over_num_fram:25,max_simultaneous_selected_act:31,max_siz:30,max_spe:25,maxim:[4,13],maximum:[0,12,14,19,20,24,25,26,28,30],mean:[0,2,6,7,8,9,10,18,23,26,28,29,31,34,43],meant:37,measur:[3,4,23,25,28,31,33,43,44],measurements_nam:31,mechan:[27,39,45],memor:43,memori:[3,22,24,28,32,35,36,42,43,44],memorygranular:30,memoryparamet:[3,32],memorystor:36,merg:[23,25],method:[0,5,6,10,17,23,28,30,36],metric:[0,31,34],middlewar:[19,23,37],middleware_paramet:23,middlewareparamet:23,midpoint:21,might:[3,9,25,32,36,37,44],min:[6,12,20,22],min_reward_threshold:0,mind:45,minim:[2,4,12,36],minimap_s:25,minimum:[0,6,28],mix:[3,6,10,19,20,43],mixedmontecarloalgorithmparamet:16,mixtur:[16,23],mmc:[16,43],mmc_agent:16,mode:[20,23,35,45],model:[0,16,18,23,42,45],modif:43,modifi:36,modul:[3,32,35,44],modular:[32,35,37,42],mont:[3,20],monte_carlo_mixing_r:[16,20],more:[3,7,17,23,28,32,34,35,36,37,42,44],moreov:34,most:[3,9,19,24,26,36,37,39,43,44,45],mostli:[28,35],motiv:35,move:[6,10,28,34,36,39],mp4:0,mpi:36,mse:[2,13,14,21],much:[6,10,35,36,43],mujoco:[25,29,33,40],multi:[10,23,31,37],multiarrai:[3,44],multidimension:31,multipl:[4,6,10,17,23,25,26,28,29,30,31,34,35,39,42,45],multipli:[4,9,23,28],multiselect:29,multitask:[25,40],must:[23,28,36,39],mxnet:45,n_step:[19,22,24,30],n_step_discounted_reward:24,n_step_q_ag:17,nabla:7,nabla_:7,nabla_a:7,naf:43,naf_ag:18,nafalgorithmparamet:18,name:[3,23,24,25,28,31,32,44,45],nativ:[0,25,33,40],native_rend:0,navig:3,ndarrai:[3,23,24,25,26,28,29,31,33,44],nearest:19,neat:34,nec:[0,43],nec_ag:19,necalgorithmparamet:19,necessari:[3,19,23,36,44],necessarili:28,need:[0,3,22,25,26,31,32,35,36,39,43,44,45],neg:[4,28],neighbor:19,neon_compon:32,network:[0,3,23,26,32,35,36,39,42,43,44,45],network_input_tupl:23,network_nam:[3,44],network_param:26,network_paramet:23,network_wrapp:[3,23,44],networkparamet:[3,23,26,32],networkwrapp:[3,44],neural:[3,16,23,37,39],new_value_shift_coeffici:[19,30],new_weight:23,newli:[20,33,36,43],next:[3,7,13,14,18,20,21,24,25,35,44,45],next_stat:24,nfs:36,nice:45,no_accumul:23,node:[23,36,37],nois:[7,8,18,26,35],noise_percentage_schedul:26,noisi:[9,22,26],non_episod:30,none:[0,3,6,7,10,23,24,25,26,28,29,31,33,44],nontrivi:36,norm:23,norm_unclipped_grad:23,norm_unclippsed_grad:23,normal:[3,4,9,26,27,28,31],note:[19,23,26,36,44],notebook:32,notic:[23,43],notori:[34,39,43],now:[6,33],nstepqalgorithmparamet:17,nth:22,num_act:[19,30,31],num_bins_per_dimens:29,num_class:30,num_consecutive_playing_step:[3,7,44],num_consecutive_training_step:[3,44],num_neighbor:30,num_predicted_steps_ahead:4,num_speedup_step:25,num_steps_between_copying_online_weights_to_target:[7,17],num_steps_between_gradient_upd:[5,9,17],num_task:0,num_training_task:0,num_work:0,number:[0,2,4,5,7,9,11,12,17,19,21,22,23,24,25,26,28,29,30,34,36,40,45],number_of_knn:19,numpi:[3,23,24,25,26,28,29,31,33,44],object:[0,3,22,23,25,26,28,30,35,44],objectstor:36,observ:[0,3,4,10,23,24,25,27,33,35,44],observation_reduction_by_sub_parts_name_filt:28,observation_rescale_size_by_factor_filt:28,observation_rescale_to_size_filt:28,observation_space_s:23,observation_stat:28,observation_typ:25,observationspac:31,observationtyp:25,occasion:36,off:[36,43],offer:[25,40],often:[34,35,37],old:[6,10,23,43],old_weight:23,onc:[0,6,9,10,11,12,13,14,16,17,20,21,22,23,31,45],one:[0,3,15,19,20,23,24,25,26,27,30,31,33,34,36,37,43,44],ones:[33,43],onli:[0,3,4,5,6,9,10,11,12,14,15,17,19,21,22,23,24,25,26,28,29,33,35,43,44,45],onlin:[7,11,12,13,14,16,17,18,19,20,21,22,23,35,36,37],online_network:23,onnx:0,onto:27,open:[0,25,40],openai:40,oper:[20,23,28],ops:36,optim:[3,4,23,36,38],optimization_epoch:6,optimizer_epsilon:23,optimizer_typ:23,option:[9,23,25,29,31,32,34,36,37,45],order:[0,3,5,6,7,9,10,13,14,15,17,18,19,20,21,23,24,27,28,29,34,35,37,39,43,44],org:[17,30],origin:[17,28,29,39],ornstein:[7,8,26],other:[0,2,9,15,20,23,25,27,28,30,34,35,43],otherwis:[10,11,23,26,31],our:[6,36],out:[2,13,14,26,27,29,34,36,43],outcom:[26,35],output:[0,4,7,11,12,18,19,23,26,27,28,31,32,37],output_0_0:23,output_observation_spac:28,outputfilt:35,outsid:[4,26],over:[3,6,9,10,17,19,22,23,24,26,28,29,34,35,43,44],overestim:7,overfit:10,overhead:0,overlai:34,override_existing_kei:30,overriden:32,overview:35,overwhelm:35,overwritten:23,own:[23,32,36],p2p:36,p_j:[12,22],page:[3,39],pair:[0,31,36],pal:[20,43],pal_ag:20,pal_alpha:20,palalgorithmparamet:20,paper:[5,9,12,17,19,21,25,30,39],parallel:[23,34,37],parallel_predict:23,param:[24,25,26,32,33],paramet:[2,3,4,5,6,7,9,10,12,16,17,19,20,21,22,23,24,25,26,28,29,30,31,32,33,39,42,44,45],parameters_server_host:0,parent:[3,44],parmet:3,pars:35,part:[0,11,23,24,26,28,29,37,39,43],part_nam:28,partial:[29,36],partialdiscreteactionspacemap:29,particular:[4,36],particularli:[25,26,31,36,39,43],pass:[0,4,7,8,18,19,23,25,26,27,33,34,35,37],patamet:19,path:[0,32,33,36,45],pattern:35,pdf:30,penal:[6,7,10],penalti:10,pendulum_hac:33,pendulum_with_go:33,pendulumwithgo:33,per:[0,3,4,31,32,35,44],percentag:26,percentil:26,perceptron:37,perform:[0,3,23,24,28,30,33,34,35,36,43,44],period:[36,37,45],persist:[3,36],persistent_advantage_learn:20,perspect:12,phase:[3,6,7,8,10,23,25,26,35,44],phi:[12,22],physic:[25,40],pi_:6,pick:25,pickl:45,pixel:25,place:[29,34,35],placehold:[23,26],plai:[0,3,9,11,13,14,17,26,32,34,44],plain:37,planarmapsobservationspac:28,platform:[25,40],pleas:[17,39],point:[28,31,35],polici:[1,3,4,5,8,11,17,18,19,32,35,36,37,38,42,43],policy_gradient_rescal:[5,6,9,10],policy_gradients_ag:9,policygradientalgorithmparamet:9,policygradientrescal:[5,6,9,10],policyoptimizationag:32,poll:36,popul:35,popular:[25,40],port:0,posit:[4,28],possibl:[2,3,4,19,26,29,31,34,36,37,42,43,44,45],post:[27,42],post_training_command:[3,44],potenti:36,power:[25,40],ppo:[6,10,43],ppo_ag:10,ppoalgorithmparamet:10,pre:[7,26,27],predefin:[11,20,26,45],predict:[1,2,3,5,6,7,10,11,12,13,14,20,21,22,23,26,37,43,44],prediction_typ:[3,44],predictiontyp:[3,44],prefect:43,prepar:[3,44],prepare_batch_for_infer:[3,44],present:[15,19,25,28,43],preset:[0,5,32,33,35,45],press:[34,45],prevent:[7,10,35,36],previou:28,previous:[10,23],print:[0,3,45],print_networks_summari:0,priorit:[22,30],prioriti:[22,30],privat:31,pro:36,probabilit:5,probabl:[3,5,9,11,12,22,24,26,32,43,44],process:[0,3,7,8,23,26,27,28,29,32,34,35,36,37,39,42,44],produc:23,progress:23,project:[12,22],proof:36,propag:6,propagate_updates_to_dnd:19,properli:36,properti:[23,30,32,33],proport:30,protocol:36,provid:[23,36],proxi:35,proxim:3,pub:36,publish:[36,39],purpos:[0,3,9],pursuit:2,push:36,pybullet:[25,40],pygam:0,python:[25,30,32,36,40,42],qr_dqn_agent:21,qualiti:25,quantil:[3,43],quantileregressiondqnalgorithmparamet:21,queri:[19,23,35,43],question:43,quit:34,r_i:[5,17],r_t:[4,6,22],rai:36,rainbow:[3,32,43],rainbow_ag:32,rainbow_dqn_ag:22,rainbowag:32,rainbowagentparamet:32,rainbowalgorithmparamet:32,rainbowdqnalgorithmparamet:22,rainbowexplorationparamet:32,rainbowmemoryparamet:32,rainbownetworkparamet:32,rais:[3,24,44],ramp:[32,35],randmemb:36,random:[0,17,25,26,31,35,39],random_initialization_step:25,randomli:[24,35],rang:[6,7,10,12,22,25,26,28,29,31,36,43],rare:19,rate:[16,19,23,25,37],rate_for_copying_weights_to_target:7,rather:[4,34],ratio:[6,10,16,28],raw:[25,40],reach:[10,31],read:36,readabl:35,readi:36,real:3,reason:[28,39],rebuild_on_every_upd:30,receiv:[23,24],recent:[3,22,36,43,44],recommend:33,redi:36,reduc:[1,2,9,10,20,23,28,35,36,43],reduct:28,reduction_method:28,reductionmethod:28,redund:28,refer:[2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],referenc:3,regard:[3,44],regimen:36,regist:[3,44],register_sign:[3,44],regress:[2,3,43],regula:[6,10],regular:[5,6,9,10,17,19,23,26,29,30,43],regularli:23,reinforc:[3,5,7,8,9,12,13,14,15,17,20,21,22,25,26,34,35,37,38,39,40,43],rel:[26,36],relat:23,relationship:43,releas:[42,43],relev:[3,11,26,28,44],remov:28,renam:36,render:[0,3,25,33],reorder:28,repeat:[25,35],replac:[26,28,30,36],replace_mse_with_huber_loss:23,replai:[1,2,3,7,11,12,13,14,17,19,20,21,22,30,35,43,44,45],replay_buff:45,replicated_devic:23,repo:33,repositori:42,repres:[0,6,10,12,22,23,24,25,26,29,31,45],represent:37,reproduc:[35,39],request:[3,23,44],requir:[3,23,26,28,34,36,37,43,44],requires_action_valu:26,rescal:[4,5,6,9,10,23,27,28],rescale_factor:28,rescaleinterpolationtyp:28,rescaling_interpolation_typ:28,research:[25,39,40],reset:[3,19,23,25,26,33,44],reset_accumulated_gradi:23,reset_evaluation_st:[3,44],reset_gradi:23,reset_internal_st:[3,25,44],resili:36,resourc:36,respect:[7,24,25],respons:[3,24,25,35,44],rest:[23,24,29],restart:33,restor:0,result:[3,4,12,13,14,15,21,22,28,29,39,43,44,45],retriev:[19,30],return_additional_data:30,reus:[35,36],reusabl:37,reward:[0,1,2,3,4,7,9,16,17,22,23,24,25,27,31,33,34,35,43,44],reward_test_level:0,reward_typ:31,rgb:[25,28,31],rho:7,right:[2,3,26,29,34,43,44],ring:36,rl_coach:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,33,44,45],rms_prop_optimizer_decai:23,rmsprop:23,roboschool:[25,40],robot:[25,31,40,42],robust:36,rollout:36,root:34,rule:[7,11],run:[0,3,4,7,9,10,11,13,14,19,20,23,25,26,28,36,44,45],run_pre_network_filter_for_infer:[3,44],runphas:[3,44],rvert:[12,22],s3f:36,s_t:[4,5,7,11,12,13,14,16,17,18,20,22],sai:43,same:[3,4,6,9,16,17,20,23,25,29,30,34,37,39,43,44],sampl:[1,2,3,5,7,9,10,11,12,13,14,16,17,20,21,22,23,26,30,31,35,36,44],sample_with_info:31,satur:7,save:[0,3,22,26,44,45],save_checkpoint:[3,44],scale:[4,9,23,28,34,36,45],scale_down_gradients_by_number_of_workers_for_sync_train:23,scale_measurements_target:4,scaler:23,schedul:[6,26,30,35,45],scheme:[5,26,35,43],schulman:10,scienc:39,scipi:28,scratch:43,scratchpad:0,screen:[3,25,33,45],screen_siz:25,script:35,second:[0,23,34,43,45],section:[38,40],see:[3,25,28,39,40,43,44],seed:[0,25,39],seen:[4,19,20,25,28,35,39,43],segment:[25,31],select:[5,11,19,23,24,26,28,29,31,33,34,35,42,45],self:[3,23,32,33,44],send:[33,36,37],sensit:36,separ:[0,3,15,28,29,37,38,43],separate_actions_for_throttle_and_brak:25,seper:9,sequenti:[4,24,30],serv:[6,9,37],server:0,server_height:25,server_width:25,sess:[3,23,44],session:[3,23,44],set:[0,2,3,4,5,6,7,10,12,13,14,16,19,20,22,23,24,25,26,28,29,31,32,36,39,40,42,43,44,45],set_environment_paramet:[3,44],set_goal:25,set_incoming_direct:[3,44],set_is_train:23,set_sess:[3,44],set_variable_valu:23,set_weight:23,setup:[3,44],setup_logg:[3,44],sever:[0,3,6,9,10,11,23,25,26,28,32,33,34,35,37,40,43,44,45],shape:[23,28,31],share:[0,3,23,30,36,37,44],shared_memory_scratchpad:0,shared_optim:23,shelf:36,shift:[29,35],shine:34,should:[0,3,4,6,10,11,17,20,23,24,25,28,30,31,32,33,44,45],should_dump:0,shouldn:11,show:39,shown:39,shuffl:24,side:[3,44],sigma:26,signal:[3,35,44],signal_nam:[3,44],signific:36,significantli:15,similar:[6,15,17,24,25,29,36,43],simpl:[9,30,32,33,36,37,42,43,45],simplest:43,simplif:43,simplifi:[6,34,37],simul:[25,33,40,45],simultan:6,sinc:[3,6,7,9,17,19,20,22,23,26,28,36,44],singl:[3,4,5,6,10,11,15,16,17,23,24,25,26,29,31,34,35,36,37,44],size:[23,24,26,28,29,30,31,36],skill:43,skip:[25,35],slave:[3,44],slice:24,slightli:36,slow:[23,45],slower:[0,15],slowli:7,small:[6,19,30,36],smaller:26,smooth:34,soft:[7,10,18],softmax:26,solut:43,solv:[28,33,40,42],some:[0,3,10,23,24,26,28,32,33,34,36,37,39,43,44,45],someth:36,sort:21,sourc:[0,1,2,3,4,5,6,7,9,10,12,14,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,33,40,44],space:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25,26,27,28,29,30,35,42,44],spacesdefinit:[3,23,44],spatial:43,special:15,specif:[0,3,11,15,19,23,24,32,35,36],specifi:[0,23,26,28,33,36,45],speed:[23,28,43],speedup:45,spent:36,spread:[28,29],sql:36,squar:28,squeeze_list:23,squeeze_output:23,stabil:[17,43],stabl:[37,43],stack:[3,27,28,31,44],stack_siz:[23,28],stacking_axi:28,stage:37,stai:39,standard:[6,9,10,11,26,28,34,36],starcraft2_environ:25,starcraft2environ:25,starcraft:[31,40],starcraftobservationtyp:25,start:[3,7,10,15,20,24,28,29,33,36,44],state:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,28,30,31,32,33,35,37,38,43,44],state_key_with_the_class_index:[2,30],state_spac:25,state_valu:24,statist:[3,9,28,42,44],stdev:26,steep:26,step:[0,3,4,5,6,7,9,10,11,12,13,14,16,18,19,20,21,22,23,24,25,26,28,32,33,34,35,36,43,44,45],stepmethod:[7,17],stochast:35,stop:25,storag:36,store:[0,3,19,22,24,25,28,30,34,35,36,44,45],store_transitions_only_when_episodes_are_termin:22,str:[0,2,3,4,17,23,24,25,26,28,29,31,44],strategi:[25,40],stream:15,strict:39,strictli:36,string:[0,23,25],structur:[0,3,24,30,32,35,44],stuff:23,style:26,sub:[29,30,31,32,35,36,45],sub_spac:31,subscrib:36,subset:[34,39,43],subtract:20,succeed:25,success:[25,43],suffer:34,suffici:24,suggest:[32,36],suit:[0,40],suitabl:45,sum:[4,6,9,16,23,24],sum_:[5,12,16,17,19,22],summari:[0,3,44],supervis:43,suppli:[3,44],support:[0,3,23,25,26,34,37,38,40,42,45],sure:[0,39],surrog:6,swingup:25,symbol:23,sync:[3,23,35,36,44],synchron:[0,23,35,36,37],system:36,t_max:[9,17],take:[0,9,10,15,19,20,23,25,26,27,33,34,35,36],taken:[1,2,4,5,6,7,10,12,15,19,20,21,22,23,24,25,26],tanh:7,target:[1,2,3,4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,25,28,29,31,32,35,36,37,44],target_act:29,target_kl_diverg:10,target_network:23,target_success_r:25,targets_horizon:17,task:[0,1,2,25,28,32,34,40],task_index:0,techniqu:[6,10,42,43],teh:23,temperatur:26,temperature_schedul:26,temporari:36,tensor:[3,23,44],tensorboard:0,tensorflow:[0,3,23,44,45],tensorflow_support:23,term:[6,10,31],termin:[3,7,24,35,44],test:[0,3,5,7,8,9,10,23,32,39,42,45],test_using_a_trace_test:0,textrm:35,than:[0,3,10,23,26,34,36,37,44],thei:[3,19,20,23,26,34,35,36,43,44,45],them:[4,5,9,17,23,24,25,28,31,33,34,36,37],therefor:[0,7,27,43],theta:[6,7,12,22,26],theta_:6,thi:[0,3,4,5,6,7,9,10,11,15,17,19,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,39,43,44,45],thing:34,those:[0,3,7,11,13,14,15,19,24,26,29,35,37,38,43,44],though:36,thousand:[10,11,12,13,14,16,20,21,22],thread:[23,36],three:[3,37,38],threshold:[10,19,28],through:[0,3,4,7,8,9,10,11,19,20,23,32,33,35,37,44],tild:7,time:[0,4,20,23,26,29,30,34,36,37,43],time_limit:33,timestep:[4,9],tmp:0,todo:36,togeth:[3,17,24,35,44],toggl:34,too:10,tool:[34,43],top:[23,25,27,28,30,33,34,43],torqu:25,total:[0,3,9,10,16,19,20,24,30,32,34,43,44],total_loss:23,total_return:24,toward:36,trace:0,trace_max_env_step:0,trace_test_level:0,tradeoff:26,train:[0,3,15,23,26,32,33,34,35,36,37,39,42,43,44],train_and_sync_network:23,train_on_batch:23,transfer:[25,40],transit:[1,2,3,4,5,7,9,10,12,13,14,17,19,20,21,22,30,32,35,44],transition_idx:24,tri:43,trick:39,tricki:34,trigger:25,tune:26,tupl:[1,2,3,7,23,24,25,30,31,32],turn:[2,43],tutori:[32,33],tweak:[3,44],two:[7,9,17,23,25,26,27,28,29,31,33,37,45],type:[0,3,9,15,23,25,28,31,32,35,36,37,42,43,44,45],typic:[6,10,23,43,45],typolog:36,uhlenbeck:[7,8,26],uint8:28,unbound:31,uncertain:26,uncertainti:26,unchang:10,unclip:[3,32,44],uncorrel:17,under:[3,23,32,36,45],underbrac:5,understand:45,unifi:6,uniformli:[25,26,29,31],union:[3,24,25,26,29,31,44],uniqu:23,unit:34,unlik:10,unmask:29,unnecessari:0,unshar:[3,44],unsign:28,unspecifi:23,unstabl:[34,39],until:[0,9,10,19,22,26],unus:23,updat:[3,6,7,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,26,32,33,34,35,36,37,43,44],update_discounted_reward:24,update_log:[3,44],update_online_network:23,update_step_in_episode_log:[3,44],update_target_network:23,update_transition_before_adding_to_replay_buff:[3,44],upon:[3,5,32,44],upper:26,usag:[29,42],use:[0,1,2,3,4,5,7,8,9,11,13,14,18,23,24,25,26,28,29,30,31,32,33,35,36,37,42,43,44,45],use_accumulated_reward_as_measur:4,use_cpu:0,use_full_action_spac:25,use_kl_regular:[6,10],use_non_zero_discount_for_terminal_st:7,use_separate_networks_per_head:23,use_target_network_for_evalu:7,used:[0,2,3,5,6,7,9,10,11,12,16,17,18,19,20,21,23,25,26,28,29,30,32,33,35,36,37,39,44,45],useful:[0,3,4,22,23,26,28,31,39,43,44,45],user:[23,25,26,34,35,36],uses:[0,1,6,10,15,24,26,35,39,43,45],using:[0,3,5,6,7,9,10,13,14,16,17,18,19,20,22,23,25,26,28,32,33,34,36,40,43,44,45],usual:[28,35],util:[3,34,36,44],v_max:12,v_min:12,val:[3,31,44],val_matches_space_definit:31,valid:[0,31],valu:[0,2,3,4,5,6,7,10,11,12,13,14,15,17,18,19,20,22,23,24,25,26,28,29,30,31,32,35,36,37,38,43,44],valuabl:34,value_targets_mix_fract:[6,10],valueexcept:[3,44],valueoptimizationag:32,van:4,vari:37,variabl:[23,25],varianc:[9,26,34],variant:[26,30,43],variou:[3,24,30,42],vector:[3,4,7,8,10,11,23,25,28,31,33,37,43,44],vectorobservationspac:28,verbos:25,veri:[0,6,7,9,15,19,34,36,43,45],version:[6,10,24,36],versu:23,via:[2,11,36],video:[0,3,25],video_dump_method:0,view:34,viewabl:[3,44],visit:39,visual:[0,3,25,40,42],visualization_paramet:25,visualizationparamet:[3,25],vizdoom:40,vote:26,wai:[3,6,10,26,29,33,35,37,42,44,45],wait:[5,23,36],walk:33,want:[3,4,22,23,24,28,29,30,44],warn:[26,28,29],wasn:24,weather_id:25,websit:[25,42],weight:[4,5,6,7,10,11,12,13,14,16,17,18,19,20,21,22,23,26,35,37,43],well:[19,23,26,31,43],went:10,were:[4,12,13,14,15,19,21,22,23,24,29,39],what:[10,43],when:[0,3,4,5,6,7,8,9,10,19,23,24,25,26,28,32,33,34,36,44,45],where:[2,3,4,5,6,10,11,12,15,17,19,20,22,23,24,25,26,28,29,31,34,36,43,44],which:[0,1,2,3,5,6,7,9,10,11,15,17,18,19,20,21,23,24,25,26,28,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45],who:35,why:[34,35],window:[28,29],wise:28,within:[0,6,10,18,26,31,34],without:[5,10,29,30,34,43,45],won:[4,23],wont:23,work:[3,17,23,26,28,29,34,35,43,44,45],workaround:0,worker:[0,17,23,28,30,34,36,37,43,45],worker_devic:23,worker_host:0,wors:43,would:[23,36,43],wrap:[25,28,35,40],wrapper:[3,23,24,25,31,37,44],write:[0,3,36,44],written:[3,22,36,44],y_t:[7,11,13,14,16,18,19,20],yaml:36,year:43,yet:[15,33],you:[4,28,30,32,33,36,42,45],your:[32,33,45],yuv:28,z_i:[12,22],z_j:[12,22],zero:[2,13,14]},titles:["Additional Parameters","Behavioral Cloning","Conditional Imitation Learning","Agents","Direct Future Prediction","Actor-Critic","Clipped Proximal Policy Optimization","Deep Deterministic Policy Gradient","Hierarchical Actor Critic","Policy Gradient","Proximal Policy Optimization","Bootstrapped DQN","Categorical DQN","Double DQN","Deep Q Networks","Dueling DQN","Mixed Monte Carlo","N-Step Q Learning","Normalized Advantage Functions","Neural Episodic Control","Persistent Advantage Learning","Quantile Regression DQN","Rainbow","Architectures","Core Types","Environments","Exploration Policies","Filters","Input Filters","Output Filters","Memories","Spaces","Adding a New Agent","Adding a New Environment","Coach Dashboard","Control Flow","<no title>","Network Design","Algorithms","Benchmarks","Environments","Features","Reinforcement Learning Coach","Selecting an Algorithm","test","Usage"],titleterms:{"final":19,"function":18,"new":[32,33],"switch":45,Adding:[32,33],Using:33,across:43,action:[4,5,6,7,8,9,10,11,18,19,29,31,43],actioninfo:24,actor:[5,8],addit:[0,45],additivenois:26,advantag:[18,20],agent:[3,32,35,45],algorithm:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,38,43,45],api:33,architectur:23,attentionactionspac:31,balancedexperiencereplai:30,batch:24,behavior:1,benchmark:39,between:45,blizzard:25,boltzmann:26,bootstrap:[11,26],boxactionspac:31,can:43,carla:25,carlo:16,categor:[12,26],choos:[4,5,6,7,8,9,10,11,18,19],clip:6,clone:1,coach:[33,34,42],collect:43,compar:34,compoundactionspac:31,condit:2,continu:[6,10,43],continuousentropi:26,control:[19,25,35],copi:37,core:24,critic:[5,8],dashboard:34,deep:[7,14,45],deepmind:25,demonstr:43,descript:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],design:37,determinist:7,direct:4,discret:[5,9,43],discreteactionspac:31,distributedtaskparamet:0,doe:43,doubl:13,dqn:[11,12,13,15,21],duel:15,dump:45,egreedi:26,environ:[25,33,40,43,45],envrespons:24,episod:[19,24,30],episodicexperiencereplai:30,episodichindsightexperiencereplai:30,episodichrlhindsightexperiencereplai:30,evalu:45,experiencereplai:30,explor:26,explorationpolici:26,featur:41,filter:[27,28,29],flag:45,flow:35,framework:45,from:43,futur:4,gener:15,gif:45,goal:31,gradient:[7,9],graph:35,greedi:26,gym:[25,33],have:43,hierarch:8,human:[43,45],imageobservationspac:31,imit:[2,45],input:28,keep:37,learn:[2,17,20,42,45],level:35,manag:35,memori:30,mix:16,mont:16,more:43,multi:45,multipl:43,multiselectactionspac:31,network:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,37],networkwrapp:23,neural:19,node:43,non:30,normal:18,observ:[28,31],observationclippingfilt:28,observationcropfilt:28,observationmoveaxisfilt:28,observationnormalizationfilt:28,observationreductionbysubpartsnamefilt:28,observationrescalesizebyfactorfilt:28,observationrescaletosizefilt:28,observationrgbtoyfilt:28,observationsqueezefilt:28,observationstackingfilt:28,observationtouint8filt:28,openai:[25,33],optim:[6,10],ouprocess:26,output:29,pain:43,parallel:43,paramet:0,parameternois:26,persist:20,plai:45,planarmapsobservationspac:31,polici:[6,7,9,10,26],predict:4,presetvalidationparamet:0,prioritizedexperiencereplai:30,process:43,proxim:[6,10],qdnd:30,quantil:21,rainbow:22,regress:21,reinforc:42,render:45,reward:28,rewardclippingfilt:28,rewardnormalizationfilt:28,rewardrescalefilt:28,run:34,sampl:43,select:43,signal:34,simul:43,singl:45,singleepisodebuff:30,solv:43,space:[31,43],starcraft:25,statist:34,step:17,store:11,structur:[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],suit:25,sync:37,task:43,taskparamet:0,test:44,thread:45,through:45,track:34,train:[1,2,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,45],transit:[11,24],transitioncollect:30,truncatednorm:26,type:24,ucb:26,usag:45,vectorobservationspac:31,visual:[34,45],visualizationparamet:0,vizdoom:25,you:43,your:43}}) |