diff --git a/benchmarks/README.md b/benchmarks/README.md index 33f113b..603e882 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -34,6 +34,7 @@ The environments that were used for testing include: |**[Bootstrapped DQN](bootstrapped_dqn)**| ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[QR-DQN](qr_dqn)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[A3C](a3c)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari, Mujoco | | +|**[ACER](acer)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | |**[Clipped PPO](clipped_ppo)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Mujoco | | |**[DDPG](ddpg)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Mujoco | | |**[NEC](nec)** | ![#2E8B57](https://placehold.it/15/2E8B57/000000?text=+) |Atari | | diff --git a/benchmarks/acer/README.md b/benchmarks/acer/README.md new file mode 100644 index 0000000..e9f9535 --- /dev/null +++ b/benchmarks/acer/README.md @@ -0,0 +1,28 @@ +# ACER + +Each experiment uses 3 seeds. +The parameters used for ACER are the same parameters as described in the [original paper](https://arxiv.org/abs/1611.01224), except for the optimizer (changed to ADAM) and learning rate (1e-4) used. + +### Breakout ACER - 16 workers + +```bash +coach -p Atari_ACER -lvl breakout -n 16 +``` + +Breakout ACER + +### Space Invaders ACER - 16 workers + +```bash +coach -p Atari_ACER -lvl space_invaders -n 16 +``` + +Space Invaders ACER + +### Pong ACER - 16 workers + +```bash +coach -p Atari_ACER -lvl pong -n 16 +``` + +Pong ACER diff --git a/benchmarks/acer/breakout_acer_16_workers.png b/benchmarks/acer/breakout_acer_16_workers.png new file mode 100644 index 0000000..ed5c12f Binary files /dev/null and b/benchmarks/acer/breakout_acer_16_workers.png differ diff --git a/benchmarks/acer/pong_acer_16_workers.png b/benchmarks/acer/pong_acer_16_workers.png new file mode 100644 index 0000000..7102229 Binary files /dev/null and b/benchmarks/acer/pong_acer_16_workers.png differ diff --git a/benchmarks/acer/space_invaders_acer_16_workers.png b/benchmarks/acer/space_invaders_acer_16_workers.png new file mode 100644 index 0000000..afe5ef2 Binary files /dev/null and b/benchmarks/acer/space_invaders_acer_16_workers.png differ diff --git a/docs/_images/acer.png b/docs/_images/acer.png new file mode 100644 index 0000000..7bffc3d Binary files /dev/null and b/docs/_images/acer.png differ diff --git a/docs/_images/algorithms.png b/docs/_images/algorithms.png index ed6b475..983df67 100644 Binary files a/docs/_images/algorithms.png and b/docs/_images/algorithms.png differ diff --git a/docs/_modules/index.html b/docs/_modules/index.html index f5c0c48..20c6476 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -176,7 +176,8 @@

All modules for which code is available

-
-
[docs] def run_pre_network_filter_for_inference(self, state: StateType) -> StateType: +
[docs] def run_pre_network_filter_for_inference(self, state: StateType, update_filter_internal_state: bool=True)\ + -> StateType: """ Run filters which where defined for being applied right before using the state for inference. :param state: The state to run the filters on + :param update_filter_internal_state: Should update the filter's internal state - should not update when evaluating :return: The filtered state """ dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) - return self.pre_network_filter.filter(dummy_env_response)[0].next_state
+ return self.pre_network_filter.filter(dummy_env_response, + update_internal_state=update_filter_internal_state)[0].next_state
[docs] def get_state_embedding(self, state: dict) -> np.ndarray: """ @@ -1153,32 +1157,25 @@ :return: """ - # if we are in the first step in the episode, then we don't have a a next state and a reward and thus no - # transition yet, and therefore we don't need to store anything in the memory. - # also we did not reach the goal yet. - if self.current_episode_steps_counter == 0: - # initialize the current state - return transition.game_over - else: - # sum up the total shaped reward - self.total_shaped_reward_in_current_episode += transition.reward - self.total_reward_in_current_episode += transition.reward - self.shaped_reward.add_sample(transition.reward) - self.reward.add_sample(transition.reward) + # sum up the total shaped reward + self.total_shaped_reward_in_current_episode += transition.reward + self.total_reward_in_current_episode += transition.reward + self.shaped_reward.add_sample(transition.reward) + self.reward.add_sample(transition.reward) + + # create and store the transition + if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: + # for episodic memories we keep the transitions in a local buffer until the episode is ended. + # for regular memories we insert the transitions directly to the memory + self.current_episode_buffer.insert(transition) + if not isinstance(self.memory, EpisodicExperienceReplay) \ + and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: + self.call_memory('store', transition) - # create and store the transition - if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: - # for episodic memories we keep the transitions in a local buffer until the episode is ended. - # for regular memories we insert the transitions directly to the memory - self.current_episode_buffer.insert(transition) - if not isinstance(self.memory, EpisodicExperienceReplay) \ - and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: - self.call_memory('store', transition) + if self.ap.visualization.dump_in_episode_signals: + self.update_step_in_episode_log() - if self.ap.visualization.dump_in_episode_signals: - self.update_step_in_episode_log() - - return transition.game_over
+ return transition.game_over # TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create # an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()] @@ -1209,7 +1206,7 @@ """ Collect all of agent's network savers :param parent_path_suffix: path suffix of the parent of the agent - (could be name of level manager or composite agent) + (could be name of level manager or composite agent) :return: collection of all agent savers """ parent_path_suffix = "{}.{}".format(parent_path_suffix, self.name) @@ -1254,7 +1251,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/bc_agent.html b/docs/_modules/rl_coach/agents/bc_agent.html index 0b088f5..aab3d2c 100644 --- a/docs/_modules/rl_coach/agents/bc_agent.html +++ b/docs/_modules/rl_coach/agents/bc_agent.html @@ -296,7 +296,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/categorical_dqn_agent.html b/docs/_modules/rl_coach/agents/categorical_dqn_agent.html index 24a1692..667a484 100644 --- a/docs/_modules/rl_coach/agents/categorical_dqn_agent.html +++ b/docs/_modules/rl_coach/agents/categorical_dqn_agent.html @@ -370,7 +370,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/cil_agent.html b/docs/_modules/rl_coach/agents/cil_agent.html index 1bc34d2..b90f12e 100644 --- a/docs/_modules/rl_coach/agents/cil_agent.html +++ b/docs/_modules/rl_coach/agents/cil_agent.html @@ -302,7 +302,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/clipped_ppo_agent.html b/docs/_modules/rl_coach/agents/clipped_ppo_agent.html index 69c7030..ff0397c 100644 --- a/docs/_modules/rl_coach/agents/clipped_ppo_agent.html +++ b/docs/_modules/rl_coach/agents/clipped_ppo_agent.html @@ -505,7 +505,7 @@ self.update_log() return None - def run_pre_network_filter_for_inference(self, state: StateType): + def run_pre_network_filter_for_inference(self, state: StateType, update_internal_state: bool=False): dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) return self.pre_network_filter.filter(dummy_env_response, update_internal_state=False)[0].next_state @@ -550,7 +550,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/ddpg_agent.html b/docs/_modules/rl_coach/agents/ddpg_agent.html index 89d9c14..1564e33 100644 --- a/docs/_modules/rl_coach/agents/ddpg_agent.html +++ b/docs/_modules/rl_coach/agents/ddpg_agent.html @@ -431,7 +431,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/dfp_agent.html b/docs/_modules/rl_coach/agents/dfp_agent.html index f0f5a86..36aca38 100644 --- a/docs/_modules/rl_coach/agents/dfp_agent.html +++ b/docs/_modules/rl_coach/agents/dfp_agent.html @@ -264,7 +264,8 @@ """ :param num_predicted_steps_ahead: (int) Number of future steps to predict measurements for. The future steps won't be sequential, but rather jump - in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4 + in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4. + The predicted steps will be [t + 2**i for i in range(num_predicted_steps_ahead)] :param goal_vector: (List[float]) The goal vector will weight each of the measurements to form an optimization goal. The vector should have @@ -463,7 +464,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/dqn_agent.html b/docs/_modules/rl_coach/agents/dqn_agent.html index b956527..5024209 100644 --- a/docs/_modules/rl_coach/agents/dqn_agent.html +++ b/docs/_modules/rl_coach/agents/dqn_agent.html @@ -315,7 +315,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/mmc_agent.html b/docs/_modules/rl_coach/agents/mmc_agent.html index 5cc71e7..ad10e45 100644 --- a/docs/_modules/rl_coach/agents/mmc_agent.html +++ b/docs/_modules/rl_coach/agents/mmc_agent.html @@ -294,7 +294,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/n_step_q_agent.html b/docs/_modules/rl_coach/agents/n_step_q_agent.html index b4fe490..d112464 100644 --- a/docs/_modules/rl_coach/agents/n_step_q_agent.html +++ b/docs/_modules/rl_coach/agents/n_step_q_agent.html @@ -361,7 +361,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/naf_agent.html b/docs/_modules/rl_coach/agents/naf_agent.html index 6d77f87..20346c2 100644 --- a/docs/_modules/rl_coach/agents/naf_agent.html +++ b/docs/_modules/rl_coach/agents/naf_agent.html @@ -342,7 +342,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/nec_agent.html b/docs/_modules/rl_coach/agents/nec_agent.html index 03eb0cd..5f1e0f4 100644 --- a/docs/_modules/rl_coach/agents/nec_agent.html +++ b/docs/_modules/rl_coach/agents/nec_agent.html @@ -424,7 +424,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/pal_agent.html b/docs/_modules/rl_coach/agents/pal_agent.html index 51322ce..35c8cd3 100644 --- a/docs/_modules/rl_coach/agents/pal_agent.html +++ b/docs/_modules/rl_coach/agents/pal_agent.html @@ -322,7 +322,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/policy_gradients_agent.html b/docs/_modules/rl_coach/agents/policy_gradients_agent.html index 0e4e9a6..821dab5 100644 --- a/docs/_modules/rl_coach/agents/policy_gradients_agent.html +++ b/docs/_modules/rl_coach/agents/policy_gradients_agent.html @@ -344,7 +344,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/ppo_agent.html b/docs/_modules/rl_coach/agents/ppo_agent.html index e7a06f3..e665611 100644 --- a/docs/_modules/rl_coach/agents/ppo_agent.html +++ b/docs/_modules/rl_coach/agents/ppo_agent.html @@ -607,7 +607,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/qr_dqn_agent.html b/docs/_modules/rl_coach/agents/qr_dqn_agent.html index 43daf0e..568ce17 100644 --- a/docs/_modules/rl_coach/agents/qr_dqn_agent.html +++ b/docs/_modules/rl_coach/agents/qr_dqn_agent.html @@ -335,7 +335,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html b/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html index 9addea1..542e9e3 100644 --- a/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html +++ b/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html @@ -347,7 +347,8 @@ - + + diff --git a/docs/_modules/rl_coach/agents/value_optimization_agent.html b/docs/_modules/rl_coach/agents/value_optimization_agent.html index 509f03a..02aacb4 100644 --- a/docs/_modules/rl_coach/agents/value_optimization_agent.html +++ b/docs/_modules/rl_coach/agents/value_optimization_agent.html @@ -313,7 +313,8 @@ - + + diff --git a/docs/_modules/rl_coach/architectures/architecture.html b/docs/_modules/rl_coach/architectures/architecture.html index 8bcc1e5..358b0f3 100644 --- a/docs/_modules/rl_coach/architectures/architecture.html +++ b/docs/_modules/rl_coach/architectures/architecture.html @@ -452,7 +452,8 @@ - + + diff --git a/docs/_modules/rl_coach/architectures/network_wrapper.html b/docs/_modules/rl_coach/architectures/network_wrapper.html index 31cf60b..257c987 100644 --- a/docs/_modules/rl_coach/architectures/network_wrapper.html +++ b/docs/_modules/rl_coach/architectures/network_wrapper.html @@ -201,17 +201,6 @@ from rl_coach.saver import SaverCollection from rl_coach.spaces import SpacesDefinition from rl_coach.utils import force_list -try: - import tensorflow as tf - from rl_coach.architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork -except ImportError: - failed_imports.append("tensorflow") - -try: - import mxnet as mx - from rl_coach.architectures.mxnet_components.general_network import GeneralMxnetNetwork -except ImportError: - failed_imports.append("mxnet")
[docs]class NetworkWrapper(object): @@ -233,15 +222,19 @@ self.sess = None if self.network_parameters.framework == Frameworks.tensorflow: - if "tensorflow" not in failed_imports: - general_network = GeneralTensorFlowNetwork.construct - else: + try: + import tensorflow as tf + except ImportError: raise Exception('Install tensorflow before using it as framework') + from rl_coach.architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork + general_network = GeneralTensorFlowNetwork.construct elif self.network_parameters.framework == Frameworks.mxnet: - if "mxnet" not in failed_imports: - general_network = GeneralMxnetNetwork.construct - else: + try: + import mxnet as mx + except ImportError: raise Exception('Install mxnet before using it as framework') + from rl_coach.architectures.mxnet_components.general_network import GeneralMxnetNetwork + general_network = GeneralMxnetNetwork.construct else: raise Exception("{} Framework is not supported" .format(Frameworks().to_string(self.network_parameters.framework))) @@ -475,7 +468,8 @@ - + + diff --git a/docs/_modules/rl_coach/base_parameters.html b/docs/_modules/rl_coach/base_parameters.html index 9a0297f..764fd8f 100644 --- a/docs/_modules/rl_coach/base_parameters.html +++ b/docs/_modules/rl_coach/base_parameters.html @@ -839,7 +839,8 @@ - + + diff --git a/docs/_modules/rl_coach/core_types.html b/docs/_modules/rl_coach/core_types.html index d645c64..ddab0b8 100644 --- a/docs/_modules/rl_coach/core_types.html +++ b/docs/_modules/rl_coach/core_types.html @@ -278,6 +278,10 @@ pass +class Measurements(PredictionType): + pass + + class InputEmbedding(Embedding): pass @@ -306,10 +310,6 @@ pass -class Measurements(PredictionType): - pass - - PlayingStepsType = Union[EnvironmentSteps, EnvironmentEpisodes, Frames] @@ -509,12 +509,12 @@ Action info is a class that holds an action and various additional information details about it """ - def __init__(self, action: ActionType, action_probability: float=0, + def __init__(self, action: ActionType, all_action_probabilities: float=0, action_value: float=0., state_value: float=0., max_action_value: float=None, action_intrinsic_reward: float=0): """ :param action: the action - :param action_probability: the probability that the action was given when selecting it + :param all_action_probabilities: the probability that the action was given when selecting it :param action_value: the state-action value (Q value) of the action :param state_value: the state value (V value) of the state where the action was taken :param max_action_value: in case this is an action that was selected randomly, this is the value of the action @@ -524,7 +524,7 @@ selection """ self.action = action - self.action_probability = action_probability + self.all_action_probabilities = all_action_probabilities self.action_value = action_value self.state_value = state_value if not max_action_value: @@ -1084,7 +1084,8 @@ - + + diff --git a/docs/_modules/rl_coach/data_stores/nfs_data_store.html b/docs/_modules/rl_coach/data_stores/nfs_data_store.html index d86c06a..895aa21 100644 --- a/docs/_modules/rl_coach/data_stores/nfs_data_store.html +++ b/docs/_modules/rl_coach/data_stores/nfs_data_store.html @@ -501,7 +501,8 @@ - + + diff --git a/docs/_modules/rl_coach/data_stores/s3_data_store.html b/docs/_modules/rl_coach/data_stores/s3_data_store.html index dc9fd7e..287df0f 100644 --- a/docs/_modules/rl_coach/data_stores/s3_data_store.html +++ b/docs/_modules/rl_coach/data_stores/s3_data_store.html @@ -384,7 +384,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/carla_environment.html b/docs/_modules/rl_coach/environments/carla_environment.html index 63e873b..6637742 100644 --- a/docs/_modules/rl_coach/environments/carla_environment.html +++ b/docs/_modules/rl_coach/environments/carla_environment.html @@ -683,7 +683,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/control_suite_environment.html b/docs/_modules/rl_coach/environments/control_suite_environment.html index ea6e543..c5852b0 100644 --- a/docs/_modules/rl_coach/environments/control_suite_environment.html +++ b/docs/_modules/rl_coach/environments/control_suite_environment.html @@ -414,7 +414,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/doom_environment.html b/docs/_modules/rl_coach/environments/doom_environment.html index f186058..1528cf5 100644 --- a/docs/_modules/rl_coach/environments/doom_environment.html +++ b/docs/_modules/rl_coach/environments/doom_environment.html @@ -483,7 +483,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/environment.html b/docs/_modules/rl_coach/environments/environment.html index 5b34525..99b7f35 100644 --- a/docs/_modules/rl_coach/environments/environment.html +++ b/docs/_modules/rl_coach/environments/environment.html @@ -240,10 +240,11 @@ logger.screen.error("No level has been selected. Please select a level using the -lvl command line flag, " "or change the level in the preset. \nThe available levels are: \n{}" .format(', '.join(sorted(self.levels.keys()))), crash=True) - if self.selected_level not in self.levels.keys(): + selected_level = self.selected_level.lower() + if selected_level not in self.levels.keys(): logger.screen.error("The selected level ({}) is not part of the available levels ({})" - .format(self.selected_level, ', '.join(self.levels.keys())), crash=True) - return self.levels[self.selected_level] + .format(selected_level, ', '.join(self.levels.keys())), crash=True) + return self.levels[selected_level] # class SingleLevelPerPhase(LevelSelection): @@ -717,7 +718,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/gym_environment.html b/docs/_modules/rl_coach/environments/gym_environment.html index 406dbd5..b8ce28c 100644 --- a/docs/_modules/rl_coach/environments/gym_environment.html +++ b/docs/_modules/rl_coach/environments/gym_environment.html @@ -559,6 +559,11 @@ num_actions=self.env.action_space.n, descriptions=actions_description ) + else: + raise screen.error(( + "Failed to instantiate gym environment class {} due to unsupported " + "action space {}. Expected BoxActionSpace or DiscreteActionSpace." + ).format(env_class, self.env.action_space), crash=True) if self.human_control: # TODO: add this to the action space @@ -741,7 +746,8 @@ - + + diff --git a/docs/_modules/rl_coach/environments/starcraft2_environment.html b/docs/_modules/rl_coach/environments/starcraft2_environment.html index 5972b90..d0e7ac2 100644 --- a/docs/_modules/rl_coach/environments/starcraft2_environment.html +++ b/docs/_modules/rl_coach/environments/starcraft2_environment.html @@ -466,7 +466,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/additive_noise.html b/docs/_modules/rl_coach/exploration_policies/additive_noise.html index 1bd8dca..196539d 100644 --- a/docs/_modules/rl_coach/exploration_policies/additive_noise.html +++ b/docs/_modules/rl_coach/exploration_policies/additive_noise.html @@ -268,7 +268,7 @@ action_values_mean = action_values.squeeze() # step the noise schedule - if self.phase == RunPhase.TRAIN: + if self.phase is not RunPhase.TEST: self.noise_percentage_schedule.step() # the second element of the list is assumed to be the standard deviation if isinstance(action_values, list) and len(action_values) > 1: @@ -318,7 +318,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/boltzmann.html b/docs/_modules/rl_coach/exploration_policies/boltzmann.html index a71de9d..ccfb4e9 100644 --- a/docs/_modules/rl_coach/exploration_policies/boltzmann.html +++ b/docs/_modules/rl_coach/exploration_policies/boltzmann.html @@ -280,7 +280,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html index ea3ac97..306ffad 100644 --- a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html +++ b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html @@ -303,7 +303,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/categorical.html b/docs/_modules/rl_coach/exploration_policies/categorical.html index 18925ee..4b9b878 100644 --- a/docs/_modules/rl_coach/exploration_policies/categorical.html +++ b/docs/_modules/rl_coach/exploration_policies/categorical.html @@ -269,7 +269,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html index 39ac379..ec08eca 100644 --- a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html +++ b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html @@ -253,7 +253,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/e_greedy.html b/docs/_modules/rl_coach/exploration_policies/e_greedy.html index 2e88e22..c7dd0ce 100644 --- a/docs/_modules/rl_coach/exploration_policies/e_greedy.html +++ b/docs/_modules/rl_coach/exploration_policies/e_greedy.html @@ -330,7 +330,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html index bef11d5..6d03253 100644 --- a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html +++ b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html @@ -299,7 +299,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/greedy.html b/docs/_modules/rl_coach/exploration_policies/greedy.html index 8bcfca2..fe265a8 100644 --- a/docs/_modules/rl_coach/exploration_policies/greedy.html +++ b/docs/_modules/rl_coach/exploration_policies/greedy.html @@ -266,7 +266,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/ou_process.html b/docs/_modules/rl_coach/exploration_policies/ou_process.html index 15e0dcb..ec86d64 100644 --- a/docs/_modules/rl_coach/exploration_policies/ou_process.html +++ b/docs/_modules/rl_coach/exploration_policies/ou_process.html @@ -301,7 +301,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html index fd5b326..3a4c248 100644 --- a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html +++ b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html @@ -198,7 +198,6 @@ import numpy as np -from rl_coach.agents.dqn_agent import DQNAgentParameters from rl_coach.architectures.layers import NoisyNetDense from rl_coach.base_parameters import AgentParameters, NetworkParameters from rl_coach.spaces import ActionSpace, BoxActionSpace, DiscreteActionSpace @@ -303,7 +302,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html index 11b9bfc..7cc68f3 100644 --- a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html +++ b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html @@ -272,7 +272,7 @@ action_values_mean = action_values.squeeze() # step the noise schedule - if self.phase == RunPhase.TRAIN: + if self.phase is not RunPhase.TEST: self.noise_percentage_schedule.step() # the second element of the list is assumed to be the standard deviation if isinstance(action_values, list) and len(action_values) > 1: @@ -325,7 +325,8 @@ - + + diff --git a/docs/_modules/rl_coach/exploration_policies/ucb.html b/docs/_modules/rl_coach/exploration_policies/ucb.html index 562dcba..bc4ef54 100644 --- a/docs/_modules/rl_coach/exploration_policies/ucb.html +++ b/docs/_modules/rl_coach/exploration_policies/ucb.html @@ -307,7 +307,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/attention_discretization.html b/docs/_modules/rl_coach/filters/action/attention_discretization.html index a74f30f..78acae4 100644 --- a/docs/_modules/rl_coach/filters/action/attention_discretization.html +++ b/docs/_modules/rl_coach/filters/action/attention_discretization.html @@ -288,7 +288,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/box_discretization.html b/docs/_modules/rl_coach/filters/action/box_discretization.html index b7c42bb..31f8544 100644 --- a/docs/_modules/rl_coach/filters/action/box_discretization.html +++ b/docs/_modules/rl_coach/filters/action/box_discretization.html @@ -288,7 +288,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/box_masking.html b/docs/_modules/rl_coach/filters/action/box_masking.html index 33ad5f1..ed5ad83 100644 --- a/docs/_modules/rl_coach/filters/action/box_masking.html +++ b/docs/_modules/rl_coach/filters/action/box_masking.html @@ -296,7 +296,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html b/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html index 8a1ce8d..b995412 100644 --- a/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html +++ b/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html @@ -249,7 +249,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html b/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html index 9bb570a..0a12a71 100644 --- a/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html +++ b/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html @@ -277,7 +277,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html b/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html index bbc8bb0..f895e13 100644 --- a/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html +++ b/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html @@ -274,7 +274,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html b/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html index 6c98cb9..8132bcd 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html @@ -262,7 +262,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html b/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html index 59d7e86..6f63065 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html @@ -309,7 +309,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html b/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html index 0b0340a..65be842 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html @@ -282,7 +282,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html b/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html index d4a1b9b..cba55b3 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html @@ -267,6 +267,7 @@ def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str): self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix)
+ @@ -304,7 +305,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html b/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html index 229b61e..ebabdfa 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html @@ -296,7 +296,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html index 14917d0..f017060 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html @@ -288,7 +288,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html index 75bb7cd..b08bf25 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html @@ -314,7 +314,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html index 6142f0a..b93c916 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html @@ -266,7 +266,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html b/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html index 242e694..12f26b5 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html @@ -264,7 +264,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html b/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html index cf19730..9d91406 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html @@ -323,7 +323,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html b/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html index 6c8eb0a..fd5c53f 100644 --- a/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html +++ b/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html @@ -280,7 +280,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html b/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html index 00f79d3..ed198f8 100644 --- a/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html +++ b/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html @@ -269,7 +269,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html b/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html index 24c33aa..1f45f5d 100644 --- a/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html +++ b/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html @@ -297,7 +297,8 @@ - + + diff --git a/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html b/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html index d781780..10f8d0e 100644 --- a/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html +++ b/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html @@ -259,7 +259,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/backend/redis.html b/docs/_modules/rl_coach/memories/backend/redis.html index b00fab0..5a50f90 100644 --- a/docs/_modules/rl_coach/memories/backend/redis.html +++ b/docs/_modules/rl_coach/memories/backend/redis.html @@ -440,7 +440,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html index 9cdf43b..8fc2325 100644 --- a/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html +++ b/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html @@ -255,18 +255,27 @@ def num_transitions_in_complete_episodes(self): return self._num_transitions_in_complete_episodes - def sample(self, size: int) -> List[Transition]: + def sample(self, size: int, is_consecutive_transitions=False) -> List[Transition]: """ - Sample a batch of transitions form the replay buffer. If the requested size is larger than the number + Sample a batch of transitions from the replay buffer. If the requested size is larger than the number of samples available in the replay buffer then the batch will return empty. :param size: the size of the batch to sample + :param is_consecutive_transitions: if set True, samples a batch of consecutive transitions. :return: a batch (list) of selected transitions from the replay buffer """ self.reader_writer_lock.lock_writing() if self.num_complete_episodes() >= 1: - transitions_idx = np.random.randint(self.num_transitions_in_complete_episodes(), size=size) - batch = [self.transitions[i] for i in transitions_idx] + if is_consecutive_transitions: + episode_idx = np.random.randint(0, self.num_complete_episodes()) + if self._buffer[episode_idx].length() <= size: + batch = self._buffer[episode_idx].transitions + else: + transition_idx = np.random.randint(size, self._buffer[episode_idx].length()) + batch = self._buffer[episode_idx].transitions[transition_idx-size:transition_idx] + else: + transitions_idx = np.random.randint(self.num_transitions_in_complete_episodes(), size=size) + batch = [self.transitions[i] for i in transitions_idx] else: raise ValueError("The episodic replay buffer cannot be sampled since there are no complete episodes yet. " @@ -523,7 +532,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html index 2f2a86f..ae82aee 100644 --- a/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html +++ b/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html @@ -363,7 +363,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html index 30b26a1..e5c9589 100644 --- a/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html +++ b/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html @@ -288,7 +288,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html b/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html index e455191..7c79772 100644 --- a/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html +++ b/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html @@ -248,7 +248,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html index a2ca727..c82c656 100644 --- a/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html +++ b/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html @@ -388,7 +388,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html b/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html index bdbed53..7e23007 100644 --- a/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html +++ b/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html @@ -511,7 +511,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html index 9b848bc..735d943 100644 --- a/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html +++ b/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html @@ -455,7 +455,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html index 37d70db..6e84615 100644 --- a/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html +++ b/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html @@ -514,7 +514,8 @@ - + + diff --git a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html index 0fcd72b..cc279c6 100644 --- a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html +++ b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html @@ -268,7 +268,8 @@ - + + diff --git a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html index 9932e81..47babe0 100644 --- a/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html +++ b/docs/_modules/rl_coach/orchestrators/kubernetes_orchestrator.html @@ -627,7 +627,8 @@ - + + diff --git a/docs/_modules/rl_coach/spaces.html b/docs/_modules/rl_coach/spaces.html index eb375d5..b38d15b 100644 --- a/docs/_modules/rl_coach/spaces.html +++ b/docs/_modules/rl_coach/spaces.html @@ -299,7 +299,8 @@
[docs] def contains(self, val: Union[int, float, np.ndarray]) -> bool: """ - Checks if the given value matches the space definition in terms of shape and values + Checks if value is contained by this space. The shape must match and + all of the values must be within the low and high bounds. :param val: a value to check :return: True / False depending on if the val matches the space definition @@ -314,16 +315,16 @@ return False return True
-
[docs] def is_valid_index(self, point: np.ndarray) -> bool: +
[docs] def is_valid_index(self, index: np.ndarray) -> bool: """ - Checks if a given multidimensional point is within the bounds of the shape of the space + Checks if a given multidimensional index is within the bounds of the shape of the space - :param point: a multidimensional point - :return: True if the point is within the shape of the space. False otherwise + :param index: a multidimensional index + :return: True if the index is within the shape of the space. False otherwise """ - if len(point) != self.num_dimensions: + if len(index) != self.num_dimensions: return False - if np.any(point < np.zeros(self.num_dimensions)) or np.any(point >= self.shape): + if np.any(index < np.zeros(self.num_dimensions)) or np.any(index >= self.shape): return False return True
@@ -338,7 +339,21 @@ if np.any(self.low == -np.inf) or np.any(self.high == np.inf): return np.random.normal(0, 1, self.shape) else: - return np.random.uniform(self.low, self.high, self.shape)
+ return np.random.uniform(self.low, self.high, self.shape) + + def val_matches_space_definition(self, val: Union[int, float, np.ndarray]) -> bool: + screen.warning( + "Space.val_matches_space_definition will be deprecated soon. Use " + "contains instead." + ) + return self.contains(val) + + def is_point_in_space_shape(self, point: np.ndarray) -> bool: + screen.warning( + "Space.is_point_in_space_shape will be deprecated soon. Use " + "is_valid_index instead." + ) + return self.is_valid_index(point) class RewardSpace(Space): @@ -568,7 +583,8 @@ return np.random.choice(self.actions) def sample_with_info(self) -> ActionInfo: - return ActionInfo(self.sample(), action_probability=1. / (self.high[0] - self.low[0] + 1)) + return ActionInfo(self.sample(), + all_action_probabilities=np.full(len(self.actions), 1. / (self.high[0] - self.low[0] + 1))) def get_description(self, action: int) -> str: if type(self.descriptions) == list and 0 <= action < len(self.descriptions): @@ -615,7 +631,7 @@ return random.choice(self.actions) def sample_with_info(self) -> ActionInfo: - return ActionInfo(self.sample(), action_probability=1. / len(self.actions)) + return ActionInfo(self.sample(), all_action_probabilities=np.full(len(self.actions), 1. / len(self.actions))) def get_description(self, action: np.ndarray) -> str: if np.sum(len(np.where(action == 0)[0])) + np.sum(len(np.where(action == 1)[0])) != self.shape or \ @@ -856,7 +872,8 @@ - + + diff --git a/docs/_sources/components/agents/index.rst.txt b/docs/_sources/components/agents/index.rst.txt index 1a5cd42..62aaf0e 100644 --- a/docs/_sources/components/agents/index.rst.txt +++ b/docs/_sources/components/agents/index.rst.txt @@ -14,6 +14,7 @@ A detailed description of those algorithms can be found by navigating to each of :caption: Agents policy_optimization/ac + policy_optimization/acer imitation/bc value_optimization/bs_dqn value_optimization/categorical_dqn diff --git a/docs/_sources/components/agents/other/dfp.rst.txt b/docs/_sources/components/agents/other/dfp.rst.txt index 6640f56..86ea7aa 100644 --- a/docs/_sources/components/agents/other/dfp.rst.txt +++ b/docs/_sources/components/agents/other/dfp.rst.txt @@ -32,8 +32,8 @@ Training the network Given a batch of transitions, run them through the network to get the current predictions of the future measurements per action, and set them as the initial targets for training the network. For each transition :math:`(s_t,a_t,r_t,s_{t+1} )` in the batch, the target of the network for the action that was taken, is the actual - measurements that were seen in time-steps :math:`t+1,t+2,t+4,t+8,t+16` and :math:`t+32`. - For the actions that were not taken, the targets are the current values. +measurements that were seen in time-steps :math:`t+1,t+2,t+4,t+8,t+16` and :math:`t+32`. +For the actions that were not taken, the targets are the current values. .. autoclass:: rl_coach.agents.dfp_agent.DFPAlgorithmParameters diff --git a/docs/_sources/components/agents/policy_optimization/acer.rst.txt b/docs/_sources/components/agents/policy_optimization/acer.rst.txt new file mode 100644 index 0000000..7808443 --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/acer.rst.txt @@ -0,0 +1,60 @@ +ACER +============ + +**Actions space:** Discrete + +**References:** `Sample Efficient Actor-Critic with Experience Replay `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/acer.png + :width: 500px + :align: center + +Algorithm Description +--------------------- + +Choosing an action - Discrete actions ++++++++++++++++++++++++++++++++++++++ + +The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical +distribution assigned with these probabilities. When testing, the action with the highest probability is used. + +Training the network +++++++++++++++++++++ +Each iteration perform one on-policy update with a batch of the last :math:`T_{max}` transitions, +and :math:`n` (replay ratio) off-policy updates from batches of :math:`T_{max}` transitions sampled from the replay buffer. + +Each update perform the following procedure: + +1. **Calculate state values:** + + .. math:: V(s_t) = \mathbb{E}_{a \sim \pi} [Q(s_t,a)] + +2. **Calculate Q retrace:** + + .. math:: Q^{ret}(s_t,a_t) = r_t +\gamma \bar{\rho}_{t+1}[Q^{ret}(s_{t+1},a_{t+1}) - Q(s_{t+1},a_{t+1})] + \gamma V(s_{t+1}) + .. math:: \text{where} \quad \bar{\rho}_{t} = \min{\left\{c,\rho_t\right\}},\quad \rho_t=\frac{\pi (a_t \mid s_t)}{\mu (a_t \mid s_t)} + +3. **Accumulate gradients:** + :math:`\bullet` **Policy gradients (with bias correction):** + + .. math:: \hat{g}_t^{policy} & = & \bar{\rho}_{t} \nabla \log \pi (a_t \mid s_t) [Q^{ret}(s_t,a_t) - V(s_t)] \\ + & & + \mathbb{E}_{a \sim \pi} \left(\left[\frac{\rho_t(a)-c}{\rho_t(a)}\right] \nabla \log \pi (a \mid s_t) [Q(s_t,a) - V(s_t)] \right) + + :math:`\bullet` **Q-Head gradients (MSE):** + + .. math:: \hat{g}_t^{Q} = (Q^{ret}(s_t,a_t) - Q(s_t,a_t)) \nabla Q(s_t,a_t)\\ + +4. **(Optional) Trust region update:** change the policy loss gradient w.r.t network output: + + .. math:: \hat{g}_t^{trust-region} = \hat{g}_t^{policy} - \max \left\{0, \frac{k^T \hat{g}_t^{policy} - \delta}{\lVert k \rVert_2^2}\right\} k + .. math:: \text{where} \quad k = \nabla D_{KL}[\pi_{avg} \parallel \pi] + + The average policy network is an exponential moving average of the parameters of the network (:math:`\theta_{avg}=\alpha\theta_{avg}+(1-\alpha)\theta`). + The goal of the trust region update is to the difference between the updated policy and the average policy to ensure stability. + + + +.. autoclass:: rl_coach.agents.acer_agent.ACERAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt index cb29797..a041c4f 100644 --- a/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt +++ b/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt @@ -19,7 +19,7 @@ Training the network 1. Sample a batch of transitions from the replay buffer. -2. Using the next states from the sampled batch, run the online network in order to find the $Q$ maximizing +2. Using the next states from the sampled batch, run the online network in order to find the :math:`Q` maximizing action :math:`argmax_a Q(s_{t+1},a)`. For these actions, use the corresponding next states and run the target network to calculate :math:`Q(s_{t+1},argmax_a Q(s_{t+1},a))`. diff --git a/docs/_sources/components/agents/value_optimization/dqn.rst.txt b/docs/_sources/components/agents/value_optimization/dqn.rst.txt index 4882e38..7c267c6 100644 --- a/docs/_sources/components/agents/value_optimization/dqn.rst.txt +++ b/docs/_sources/components/agents/value_optimization/dqn.rst.txt @@ -26,7 +26,7 @@ Training the network use the current states from the sampled batch, and run the online network to get the current Q values predictions. Set those values as the targets for the actions that were not actually played. -4. For each action that was played, use the following equation for calculating the targets of the network:​ $$ y_t=r(s_t,a_t)+γ\cdot max_a {Q(s_{t+1},a)} $$ +4. For each action that was played, use the following equation for calculating the targets of the network:​ :math:`y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})` 5. Finally, train the online network using the current states as inputs, and with the aforementioned targets. diff --git a/docs/_sources/selecting_an_algorithm.rst.txt b/docs/_sources/selecting_an_algorithm.rst.txt index c867191..e56f802 100644 --- a/docs/_sources/selecting_an_algorithm.rst.txt +++ b/docs/_sources/selecting_an_algorithm.rst.txt @@ -190,6 +190,14 @@ The algorithms are ordered by their release date in descending order. learning stability and speed, both for discrete and continuous action spaces. +
+ + ACER +
+ Similar to A3C with the addition of experience replay and off-policy training. to reduce variance and + improve stability it also employs bias correction and trust region optimization techniques. +
+
DDPG diff --git a/docs/_static/basic.css b/docs/_static/basic.css index 104f076..0807176 100644 --- a/docs/_static/basic.css +++ b/docs/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/_static/doctools.js b/docs/_static/doctools.js index ffadbec..344db17 100644 --- a/docs/_static/doctools.js +++ b/docs/_static/doctools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for all documentation. * - * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index 4e5aeec..11a90c3 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -7,290 +7,4 @@ var DOCUMENTATION_OPTIONS = { HAS_SOURCE: true, SOURCELINK_SUFFIX: '.txt', NAVIGATION_WITH_KEYS: false, - SEARCH_LANGUAGE_STOP_WORDS: ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"] -}; - - - -/* Non-minified version JS is _stemmer.js if file is provided */ -/** - * Porter Stemmer - */ -var Stemmer = function() { - - var step2list = { - ational: 'ate', - tional: 'tion', - enci: 'ence', - anci: 'ance', - izer: 'ize', - bli: 'ble', - alli: 'al', - entli: 'ent', - eli: 'e', - ousli: 'ous', - ization: 'ize', - ation: 'ate', - ator: 'ate', - alism: 'al', - iveness: 'ive', - fulness: 'ful', - ousness: 'ous', - aliti: 'al', - iviti: 'ive', - biliti: 'ble', - logi: 'log' - }; - - var step3list = { - icate: 'ic', - ative: '', - alize: 'al', - iciti: 'ic', - ical: 'ic', - ful: '', - ness: '' - }; - - var c = "[^aeiou]"; // consonant - var v = "[aeiouy]"; // vowel - var C = c + "[^aeiouy]*"; // consonant sequence - var V = v + "[aeiou]*"; // vowel sequence - - var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 - var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 - var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 - var s_v = "^(" + C + ")?" + v; // vowel in stem - - this.stemWord = function (w) { - var stem; - var suffix; - var firstch; - var origword = w; - - if (w.length < 3) - return w; - - var re; - var re2; - var re3; - var re4; - - firstch = w.substr(0,1); - if (firstch == "y") - w = firstch.toUpperCase() + w.substr(1); - - // Step 1a - re = /^(.+?)(ss|i)es$/; - re2 = /^(.+?)([^s])s$/; - - if (re.test(w)) - w = w.replace(re,"$1$2"); - else if (re2.test(w)) - w = w.replace(re2,"$1$2"); - - // Step 1b - re = /^(.+?)eed$/; - re2 = /^(.+?)(ed|ing)$/; - if (re.test(w)) { - var fp = re.exec(w); - re = new RegExp(mgr0); - if (re.test(fp[1])) { - re = /.$/; - w = w.replace(re,""); - } - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1]; - re2 = new RegExp(s_v); - if (re2.test(stem)) { - w = stem; - re2 = /(at|bl|iz)$/; - re3 = new RegExp("([^aeiouylsz])\\1$"); - re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re2.test(w)) - w = w + "e"; - else if (re3.test(w)) { - re = /.$/; - w = w.replace(re,""); - } - else if (re4.test(w)) - w = w + "e"; - } - } - - // Step 1c - re = /^(.+?)y$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(s_v); - if (re.test(stem)) - w = stem + "i"; - } - - // Step 2 - re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) - w = stem + step2list[suffix]; - } - - // Step 3 - re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) - w = stem + step3list[suffix]; - } - - // Step 4 - re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; - re2 = /^(.+?)(s|t)(ion)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - if (re.test(stem)) - w = stem; - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1] + fp[2]; - re2 = new RegExp(mgr1); - if (re2.test(stem)) - w = stem; - } - - // Step 5 - re = /^(.+?)e$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - re2 = new RegExp(meq1); - re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) - w = stem; - } - re = /ll$/; - re2 = new RegExp(mgr1); - if (re.test(w) && re2.test(w)) { - re = /.$/; - w = w.replace(re,""); - } - - // and turn initial Y back to y - if (firstch == "y") - w = firstch.toLowerCase() + w.substr(1); - return w; - } -} - - - - - -var splitChars = (function() { - var result = {}; - var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, - 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, - 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, - 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, - 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, - 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, - 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, - 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, - 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, - 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; - var i, j, start, end; - for (i = 0; i < singles.length; i++) { - result[singles[i]] = true; - } - var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], - [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], - [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], - [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], - [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], - [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], - [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], - [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], - [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], - [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], - [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], - [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], - [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], - [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], - [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], - [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], - [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], - [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], - [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], - [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], - [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], - [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], - [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], - [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], - [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], - [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], - [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], - [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], - [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], - [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], - [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], - [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], - [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], - [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], - [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], - [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], - [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], - [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], - [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], - [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], - [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], - [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], - [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], - [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], - [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], - [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], - [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], - [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], - [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; - for (i = 0; i < ranges.length; i++) { - start = ranges[i][0]; - end = ranges[i][1]; - for (j = start; j <= end; j++) { - result[j] = true; - } - } - return result; -})(); - -function splitQuery(query) { - var result = []; - var start = -1; - for (var i = 0; i < query.length; i++) { - if (splitChars[query.charCodeAt(i)]) { - if (start !== -1) { - result.push(query.slice(start, i)); - start = -1; - } - } else if (start === -1) { - start = i; - } - } - if (start !== -1) { - result.push(query.slice(start)); - } - return result; -} - - +}; \ No newline at end of file diff --git a/docs/_static/language_data.js b/docs/_static/language_data.js new file mode 100644 index 0000000..5266fb1 --- /dev/null +++ b/docs/_static/language_data.js @@ -0,0 +1,297 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"]; + + +/* Non-minified version JS is _stemmer.js if file is provided */ +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + + + + + +var splitChars = (function() { + var result = {}; + var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, + 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, + 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, + 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, + 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, + 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, + 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, + 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, + 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, + 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; + var i, j, start, end; + for (i = 0; i < singles.length; i++) { + result[singles[i]] = true; + } + var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], + [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], + [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], + [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], + [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], + [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], + [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], + [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], + [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], + [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], + [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], + [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], + [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], + [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], + [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], + [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], + [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], + [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], + [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], + [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], + [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], + [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], + [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], + [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], + [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], + [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], + [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], + [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], + [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], + [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], + [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], + [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], + [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], + [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], + [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], + [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], + [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], + [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], + [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], + [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], + [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], + [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], + [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], + [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], + [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], + [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], + [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], + [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], + [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; + for (i = 0; i < ranges.length; i++) { + start = ranges[i][0]; + end = ranges[i][1]; + for (j = start; j <= end; j++) { + result[j] = true; + } + } + return result; +})(); + +function splitQuery(query) { + var result = []; + var start = -1; + for (var i = 0; i < query.length; i++) { + if (splitChars[query.charCodeAt(i)]) { + if (start !== -1) { + result.push(query.slice(start, i)); + start = -1; + } + } else if (start === -1) { + start = i; + } + } + if (start !== -1) { + result.push(query.slice(start)); + } + return result; +} + + diff --git a/docs/_static/searchtools.js b/docs/_static/searchtools.js index 7473859..5ff3180 100644 --- a/docs/_static/searchtools.js +++ b/docs/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -138,7 +138,6 @@ var Search = { */ query : function(query) { var i; - var stopwords = DOCUMENTATION_OPTIONS.SEARCH_LANGUAGE_STOP_WORDS; // stem the searchterms and add them to the correct list var stemmer = new Stemmer(); diff --git a/docs/_static/websupport.js b/docs/_static/websupport.js index 78e14bb..3b4999e 100644 --- a/docs/_static/websupport.js +++ b/docs/_static/websupport.js @@ -4,7 +4,7 @@ * * sphinx.websupport utilities for all documentation. * - * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/components/additional_parameters.html b/docs/components/additional_parameters.html index 1a48d3d..a5979ab 100644 --- a/docs/components/additional_parameters.html +++ b/docs/components/additional_parameters.html @@ -382,7 +382,8 @@ assigned - + + diff --git a/docs/components/agents/imitation/bc.html b/docs/components/agents/imitation/bc.html index 69e3f68..eb42fe9 100644 --- a/docs/components/agents/imitation/bc.html +++ b/docs/components/agents/imitation/bc.html @@ -30,7 +30,7 @@ - + @@ -107,6 +107,7 @@
@@ -286,7 +287,8 @@ the expert for each state.

- + + diff --git a/docs/components/agents/imitation/cil.html b/docs/components/agents/imitation/cil.html index c33c2a5..98119eb 100644 --- a/docs/components/agents/imitation/cil.html +++ b/docs/components/agents/imitation/cil.html @@ -107,6 +107,7 @@