diff --git a/.gitignore b/.gitignore index 39eaaf1..2f46d8b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ trace_test* *.swo .cache/ *.pyc +coachenv + diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index 0779c3a..0000000 --- a/docs/404.html +++ /dev/null @@ -1,244 +0,0 @@ - - - - - - - - - - - Reinforcement Learning Coach - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - -
  • - -
  • -
-
-
-
-
- - -

404

- -

Page not found

- - -
-
- - -
-
- -
- -
- -
- - - - - -
- - - - - - - - diff --git a/docs/algorithms/design_imgs/ac.png b/docs/_images/ac.png similarity index 100% rename from docs/algorithms/design_imgs/ac.png rename to docs/_images/ac.png diff --git a/docs/img/act.png b/docs/_images/act.png similarity index 100% rename from docs/img/act.png rename to docs/_images/act.png diff --git a/docs/_images/algorithms.png b/docs/_images/algorithms.png new file mode 100644 index 0000000..ed6b475 Binary files /dev/null and b/docs/_images/algorithms.png differ diff --git a/docs/_images/attention_discretization.png b/docs/_images/attention_discretization.png new file mode 100644 index 0000000..4e6fec3 Binary files /dev/null and b/docs/_images/attention_discretization.png differ diff --git a/docs/img/bollinger_bands.png b/docs/_images/bollinger_bands.png similarity index 100% rename from docs/img/bollinger_bands.png rename to docs/_images/bollinger_bands.png diff --git a/docs/_images/box_discretization.png b/docs/_images/box_discretization.png new file mode 100644 index 0000000..c997139 Binary files /dev/null and b/docs/_images/box_discretization.png differ diff --git a/docs/_images/box_masking.png b/docs/_images/box_masking.png new file mode 100644 index 0000000..60a39b3 Binary files /dev/null and b/docs/_images/box_masking.png differ diff --git a/docs/algorithms/design_imgs/bs_dqn.png b/docs/_images/bs_dqn.png similarity index 100% rename from docs/algorithms/design_imgs/bs_dqn.png rename to docs/_images/bs_dqn.png diff --git a/docs/_images/cil.png b/docs/_images/cil.png new file mode 100644 index 0000000..9113805 Binary files /dev/null and b/docs/_images/cil.png differ diff --git a/docs/img/compare_by_num_episodes.png b/docs/_images/compare_by_num_episodes.png similarity index 100% rename from docs/img/compare_by_num_episodes.png rename to docs/_images/compare_by_num_episodes.png diff --git a/docs/img/compare_by_time.png b/docs/_images/compare_by_time.png similarity index 100% rename from docs/img/compare_by_time.png rename to docs/_images/compare_by_time.png diff --git a/docs/algorithms/design_imgs/ddpg.png b/docs/_images/ddpg.png similarity index 100% rename from docs/algorithms/design_imgs/ddpg.png rename to docs/_images/ddpg.png diff --git a/docs/_images/design.png b/docs/_images/design.png new file mode 100644 index 0000000..e092984 Binary files /dev/null and b/docs/_images/design.png differ diff --git a/docs/algorithms/design_imgs/dfp.png b/docs/_images/dfp.png similarity index 100% rename from docs/algorithms/design_imgs/dfp.png rename to docs/_images/dfp.png diff --git a/docs/img/distributed.png b/docs/_images/distributed.png similarity index 100% rename from docs/img/distributed.png rename to docs/_images/distributed.png diff --git a/docs/algorithms/design_imgs/distributional_dqn.png b/docs/_images/distributional_dqn.png similarity index 100% rename from docs/algorithms/design_imgs/distributional_dqn.png rename to docs/_images/distributional_dqn.png diff --git a/docs/algorithms/design_imgs/dqn.png b/docs/_images/dqn.png similarity index 100% rename from docs/algorithms/design_imgs/dqn.png rename to docs/_images/dqn.png diff --git a/docs/algorithms/design_imgs/dueling_dqn.png b/docs/_images/dueling_dqn.png similarity index 100% rename from docs/algorithms/design_imgs/dueling_dqn.png rename to docs/_images/dueling_dqn.png diff --git a/docs/img/filters.png b/docs/_images/filters.png similarity index 100% rename from docs/img/filters.png rename to docs/_images/filters.png diff --git a/docs/_images/full_discrete_action_space_map.png b/docs/_images/full_discrete_action_space_map.png new file mode 100644 index 0000000..efe2d7d Binary files /dev/null and b/docs/_images/full_discrete_action_space_map.png differ diff --git a/docs/img/improve.png b/docs/_images/improve.png similarity index 100% rename from docs/img/improve.png rename to docs/_images/improve.png diff --git a/docs/_images/linear_box_to_box_map.png b/docs/_images/linear_box_to_box_map.png new file mode 100644 index 0000000..f30cbf6 Binary files /dev/null and b/docs/_images/linear_box_to_box_map.png differ diff --git a/docs/algorithms/design_imgs/naf.png b/docs/_images/naf.png similarity index 100% rename from docs/algorithms/design_imgs/naf.png rename to docs/_images/naf.png diff --git a/docs/algorithms/design_imgs/nec.png b/docs/_images/nec.png similarity index 100% rename from docs/algorithms/design_imgs/nec.png rename to docs/_images/nec.png diff --git a/docs/img/network.png b/docs/_images/network.png similarity index 100% rename from docs/img/network.png rename to docs/_images/network.png diff --git a/docs/img/observe.png b/docs/_images/observe.png similarity index 100% rename from docs/img/observe.png rename to docs/_images/observe.png diff --git a/docs/_images/partial_discrete_action_space_map.png b/docs/_images/partial_discrete_action_space_map.png new file mode 100644 index 0000000..6cad5bb Binary files /dev/null and b/docs/_images/partial_discrete_action_space_map.png differ diff --git a/docs/algorithms/design_imgs/pg.png b/docs/_images/pg.png similarity index 100% rename from docs/algorithms/design_imgs/pg.png rename to docs/_images/pg.png diff --git a/docs/algorithms/design_imgs/ppo.png b/docs/_images/ppo.png similarity index 100% rename from docs/algorithms/design_imgs/ppo.png rename to docs/_images/ppo.png diff --git a/docs/_images/qr_dqn.png b/docs/_images/qr_dqn.png new file mode 100644 index 0000000..a74d72f Binary files /dev/null and b/docs/_images/qr_dqn.png differ diff --git a/docs/_images/rainbow.png b/docs/_images/rainbow.png new file mode 100644 index 0000000..b3e266d Binary files /dev/null and b/docs/_images/rainbow.png differ diff --git a/docs/img/separate_signals.png b/docs/_images/separate_signals.png similarity index 100% rename from docs/img/separate_signals.png rename to docs/_images/separate_signals.png diff --git a/docs/img/train.png b/docs/_images/train.png similarity index 100% rename from docs/img/train.png rename to docs/_images/train.png diff --git a/docs/img/updating_dynamically.gif b/docs/_images/updating_dynamically.gif similarity index 100% rename from docs/img/updating_dynamically.gif rename to docs/_images/updating_dynamically.gif diff --git a/docs/_modules/index.html b/docs/_modules/index.html new file mode 100644 index 0000000..c1cdbb1 --- /dev/null +++ b/docs/_modules/index.html @@ -0,0 +1,296 @@ + + + + + + + + + + + Overview: module code — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Overview: module code
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

All modules for which code is available

+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/actor_critic_agent.html b/docs/_modules/rl_coach/agents/actor_critic_agent.html new file mode 100644 index 0000000..a897b1a --- /dev/null +++ b/docs/_modules/rl_coach/agents/actor_critic_agent.html @@ -0,0 +1,413 @@ + + + + + + + + + + + rl_coach.agents.actor_critic_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.actor_critic_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.actor_critic_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+import scipy.signal
+
+from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import PolicyHeadParameters, VHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
+    AgentParameters
+from rl_coach.exploration_policies.categorical import CategoricalParameters
+from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters
+from rl_coach.logger import screen
+from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+from rl_coach.utils import last_sample
+
+
+
[docs]class ActorCriticAlgorithmParameters(AlgorithmParameters): + """ + :param policy_gradient_rescaler: (PolicyGradientRescaler) + The value that will be used to rescale the policy gradient + + :param apply_gradients_every_x_episodes: (int) + The number of episodes to wait before applying the accumulated gradients to the network. + The training iterations only accumulate gradients without actually applying them. + + :param beta_entropy: (float) + The weight that will be given to the entropy regularization which is used in order to improve exploration. + + :param num_steps_between_gradient_updates: (int) + Every num_steps_between_gradient_updates transitions will be considered as a single batch and use for + accumulating gradients. This is also the number of steps used for bootstrapping according to the n-step formulation. + + :param gae_lambda: (float) + If the policy gradient rescaler was defined as PolicyGradientRescaler.GAE, the generalized advantage estimation + scheme will be used, in which case the lambda value controls the decay for the different n-step lengths. + + :param estimate_state_value_using_gae: (bool) + If set to True, the state value targets for the V head will be estimated using the GAE scheme. + """ + def __init__(self): + super().__init__() + self.policy_gradient_rescaler = PolicyGradientRescaler.A_VALUE + self.apply_gradients_every_x_episodes = 5 + self.beta_entropy = 0 + self.num_steps_between_gradient_updates = 5000 # this is called t_max in all the papers + self.gae_lambda = 0.96 + self.estimate_state_value_using_gae = False
+ + +class ActorCriticNetworkParameters(NetworkParameters): + def __init__(self): + super().__init__() + self.input_embedders_parameters = {'observation': InputEmbedderParameters()} + self.middleware_parameters = FCMiddlewareParameters() + self.heads_parameters = [VHeadParameters(loss_weight=0.5), PolicyHeadParameters(loss_weight=1.0)] + self.optimizer_type = 'Adam' + self.clip_gradients = 40.0 + self.async_training = True + + +class ActorCriticAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=ActorCriticAlgorithmParameters(), + exploration={DiscreteActionSpace: CategoricalParameters(), + BoxActionSpace: ContinuousEntropyParameters()}, + memory=SingleEpisodeBufferParameters(), + networks={"main": ActorCriticNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.actor_critic_agent:ActorCriticAgent' + + +# Actor Critic - https://arxiv.org/abs/1602.01783 +class ActorCriticAgent(PolicyOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.last_gradient_update_step_idx = 0 + self.action_advantages = self.register_signal('Advantages') + self.state_values = self.register_signal('Values') + self.value_loss = self.register_signal('Value Loss') + self.policy_loss = self.register_signal('Policy Loss') + + # Discounting function used to calculate discounted returns. + def discount(self, x, gamma): + return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1] + + def get_general_advantage_estimation_values(self, rewards, values): + # values contain n+1 elements (t ... t+n+1), rewards contain n elements (t ... t + n) + bootstrap_extended_rewards = np.array(rewards.tolist() + [values[-1]]) + + # Approximation based calculation of GAE (mathematically correct only when Tmax = inf, + # although in practice works even in much smaller Tmax values, e.g. 20) + deltas = rewards + self.ap.algorithm.discount * values[1:] - values[:-1] + gae = self.discount(deltas, self.ap.algorithm.discount * self.ap.algorithm.gae_lambda) + + if self.ap.algorithm.estimate_state_value_using_gae: + discounted_returns = np.expand_dims(gae + values[:-1], -1) + else: + discounted_returns = np.expand_dims(np.array(self.discount(bootstrap_extended_rewards, + self.ap.algorithm.discount)), 1)[:-1] + return gae, discounted_returns + + def learn_from_batch(self, batch): + # batch contains a list of episodes to learn from + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # get the values for the current states + + result = self.networks['main'].online_network.predict(batch.states(network_keys)) + current_state_values = result[0] + + self.state_values.add_sample(current_state_values) + + # the targets for the state value estimator + num_transitions = batch.size + state_value_head_targets = np.zeros((num_transitions, 1)) + + # estimate the advantage function + action_advantages = np.zeros((num_transitions, 1)) + + if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: + if batch.game_overs()[-1]: + R = 0 + else: + R = self.networks['main'].online_network.predict(last_sample(batch.next_states(network_keys)))[0] + + for i in reversed(range(num_transitions)): + R = batch.rewards()[i] + self.ap.algorithm.discount * R + state_value_head_targets[i] = R + action_advantages[i] = R - current_state_values[i] + + elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: + # get bootstraps + bootstrapped_value = self.networks['main'].online_network.predict(last_sample(batch.next_states(network_keys)))[0] + values = np.append(current_state_values, bootstrapped_value) + if batch.game_overs()[-1]: + values[-1] = 0 + + # get general discounted returns table + gae_values, state_value_head_targets = self.get_general_advantage_estimation_values(batch.rewards(), values) + action_advantages = np.vstack(gae_values) + else: + screen.warning("WARNING: The requested policy gradient rescaler is not available") + + action_advantages = action_advantages.squeeze(axis=-1) + actions = batch.actions() + if not isinstance(self.spaces.action, DiscreteActionSpace) and len(actions.shape) < 2: + actions = np.expand_dims(actions, -1) + + # train + result = self.networks['main'].online_network.accumulate_gradients({**batch.states(network_keys), + 'output_1_0': actions}, + [state_value_head_targets, action_advantages]) + + # logging + total_loss, losses, unclipped_grads = result[:3] + self.action_advantages.add_sample(action_advantages) + self.unclipped_grads.add_sample(unclipped_grads) + self.value_loss.add_sample(losses[0]) + self.policy_loss.add_sample(losses[1]) + + return total_loss, losses, unclipped_grads + + def get_prediction(self, states): + tf_input_state = self.prepare_batch_for_inference(states, "main") + return self.networks['main'].online_network.predict(tf_input_state)[1:] # index 0 is the state value +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/agent.html b/docs/_modules/rl_coach/agents/agent.html new file mode 100644 index 0000000..59234c3 --- /dev/null +++ b/docs/_modules/rl_coach/agents/agent.html @@ -0,0 +1,1153 @@ + + + + + + + + + + + rl_coach.agents.agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +

Source code for rl_coach.agents.agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+import random
+from collections import OrderedDict
+from typing import Dict, List, Union, Tuple
+
+import numpy as np
+from pandas import read_pickle
+from six.moves import range
+
+from rl_coach.agents.agent_interface import AgentInterface
+from rl_coach.architectures.network_wrapper import NetworkWrapper
+from rl_coach.base_parameters import AgentParameters, DistributedTaskParameters
+from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
+from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
+from rl_coach.logger import screen, Logger, EpisodeLogger
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
+from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
+from rl_coach.utils import Signal, force_list
+from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
+from rl_coach.memories.backend.memory_impl import get_memory_backend
+
+
+
[docs]class Agent(AgentInterface): + def __init__(self, agent_parameters: AgentParameters, parent: Union['LevelManager', 'CompositeAgent']=None): + """ + :param agent_parameters: A AgentParameters class instance with all the agent parameters + """ + super().__init__() + self.ap = agent_parameters + self.task_id = self.ap.task_parameters.task_index + self.is_chief = self.task_id == 0 + self.shared_memory = type(agent_parameters.task_parameters) == DistributedTaskParameters \ + and self.ap.memory.shared_memory + if self.shared_memory: + self.shared_memory_scratchpad = self.ap.task_parameters.shared_memory_scratchpad + self.name = agent_parameters.name + self.parent = parent + self.parent_level_manager = None + self.full_name_id = agent_parameters.full_name_id = self.name + + if type(agent_parameters.task_parameters) == DistributedTaskParameters: + screen.log_title("Creating agent - name: {} task id: {} (may take up to 30 seconds due to " + "tensorflow wake up time)".format(self.full_name_id, self.task_id)) + else: + screen.log_title("Creating agent - name: {}".format(self.full_name_id)) + self.imitation = False + self.agent_logger = Logger() + self.agent_episode_logger = EpisodeLogger() + + # get the memory + # - distributed training + shared memory: + # * is chief? -> create the memory and add it to the scratchpad + # * not chief? -> wait for the chief to create the memory and then fetch it + # - non distributed training / not shared memory: + # * create memory + memory_name = self.ap.memory.path.split(':')[1] + self.memory_lookup_name = self.full_name_id + '.' + memory_name + if self.shared_memory and not self.is_chief: + self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name) + else: + # modules + self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory) + + if hasattr(self.ap.memory, 'memory_backend_params'): + self.memory_backend = get_memory_backend(self.ap.memory.memory_backend_params) + + if self.ap.memory.memory_backend_params.run_type == 'trainer': + self.memory_backend.subscribe(self) + else: + self.memory.set_memory_backend(self.memory_backend) + + if agent_parameters.memory.load_memory_from_file_path: + screen.log_title("Loading replay buffer from pickle. Pickle path: {}" + .format(agent_parameters.memory.load_memory_from_file_path)) + self.memory.load(agent_parameters.memory.load_memory_from_file_path) + + if self.shared_memory and self.is_chief: + self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory) + + # set devices + if type(agent_parameters.task_parameters) == DistributedTaskParameters: + self.has_global = True + self.replicated_device = agent_parameters.task_parameters.device + self.worker_device = "/job:worker/task:{}".format(self.task_id) + else: + self.has_global = False + self.replicated_device = None + self.worker_device = "" + if agent_parameters.task_parameters.use_cpu: + self.worker_device += "/cpu:0" + else: + self.worker_device += "/device:GPU:0" + + # filters + self.input_filter = self.ap.input_filter + self.output_filter = self.ap.output_filter + self.pre_network_filter = self.ap.pre_network_filter + device = self.replicated_device if self.replicated_device else self.worker_device + if hasattr(self.ap.memory, 'memory_backend_params') and self.ap.algorithm.distributed_coach_synchronization_type: + self.input_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) + self.output_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) + self.pre_network_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) + else: + self.input_filter.set_device(device) + self.output_filter.set_device(device) + self.pre_network_filter.set_device(device) + + # initialize all internal variables + self._phase = RunPhase.HEATUP + self.total_shaped_reward_in_current_episode = 0 + self.total_reward_in_current_episode = 0 + self.total_steps_counter = 0 + self.running_reward = None + self.training_iteration = 0 + self.last_target_network_update_step = 0 + self.last_training_phase_step = 0 + self.current_episode = self.ap.current_episode = 0 + self.curr_state = {} + self.current_hrl_goal = None + self.current_episode_steps_counter = 0 + self.episode_running_info = {} + self.last_episode_evaluation_ran = 0 + self.running_observations = [] + self.agent_logger.set_current_time(self.current_episode) + self.exploration_policy = None + self.networks = {} + self.last_action_info = None + self.running_observation_stats = None + self.running_reward_stats = None + self.accumulated_rewards_across_evaluation_episodes = 0 + self.accumulated_shaped_rewards_across_evaluation_episodes = 0 + self.num_successes_across_evaluation_episodes = 0 + self.num_evaluation_episodes_completed = 0 + self.current_episode_buffer = Episode(discount=self.ap.algorithm.discount, n_step=self.ap.algorithm.n_step) + # TODO: add agents observation rendering for debugging purposes (not the same as the environment rendering) + + # environment parameters + self.spaces = None + self.in_action_space = self.ap.algorithm.in_action_space + + # signals + self.episode_signals = [] + self.step_signals = [] + self.loss = self.register_signal('Loss') + self.curr_learning_rate = self.register_signal('Learning Rate') + self.unclipped_grads = self.register_signal('Grads (unclipped)') + self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) + self.shaped_reward = self.register_signal('Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) + self.discounted_return = self.register_signal('Discounted Return') + if isinstance(self.in_action_space, GoalsSpace): + self.distance_from_goal = self.register_signal('Distance From Goal', dump_one_value_per_step=True) + # use seed + if self.ap.task_parameters.seed is not None: + random.seed(self.ap.task_parameters.seed) + np.random.seed(self.ap.task_parameters.seed) + else: + # we need to seed the RNG since the different processes are initialized with the same parent seed + random.seed() + np.random.seed() + + @property + def parent(self) -> 'LevelManager': + """ + Get the parent class of the agent + + :return: the current phase + """ + return self._parent + + @parent.setter + def parent(self, val) -> None: + """ + Change the parent class of the agent. + Additionally, updates the full name of the agent + + :param val: the new parent + :return: None + """ + self._parent = val + if self._parent is not None: + if not hasattr(self._parent, 'name'): + raise ValueError("The parent of an agent must have a name") + self.full_name_id = self.ap.full_name_id = "{}/{}".format(self._parent.name, self.name) + +
[docs] def setup_logger(self) -> None: + """ + Setup the logger for the agent + + :return: None + """ + # dump documentation + logger_prefix = "{graph_name}.{level_name}.{agent_full_id}".\ + format(graph_name=self.parent_level_manager.parent_graph_manager.name, + level_name=self.parent_level_manager.name, + agent_full_id='.'.join(self.full_name_id.split('/'))) + self.agent_logger.set_logger_filenames(self.ap.task_parameters.experiment_path, logger_prefix=logger_prefix, + add_timestamp=True, task_id=self.task_id) + if self.ap.visualization.dump_in_episode_signals: + self.agent_episode_logger.set_logger_filenames(self.ap.task_parameters.experiment_path, + logger_prefix=logger_prefix, + add_timestamp=True, task_id=self.task_id)
+ +
[docs] def set_session(self, sess) -> None: + """ + Set the deep learning framework session for all the agents in the composite agent + + :return: None + """ + self.input_filter.set_session(sess) + self.output_filter.set_session(sess) + self.pre_network_filter.set_session(sess) + [network.set_session(sess) for network in self.networks.values()]
+ +
[docs] def register_signal(self, signal_name: str, dump_one_value_per_episode: bool=True, + dump_one_value_per_step: bool=False) -> Signal: + """ + Register a signal such that its statistics will be dumped and be viewable through dashboard + + :param signal_name: the name of the signal as it will appear in dashboard + :param dump_one_value_per_episode: should the signal value be written for each episode? + :param dump_one_value_per_step: should the signal value be written for each step? + :return: the created signal + """ + signal = Signal(signal_name) + if dump_one_value_per_episode: + self.episode_signals.append(signal) + if dump_one_value_per_step: + self.step_signals.append(signal) + return signal
+ +
[docs] def set_environment_parameters(self, spaces: SpacesDefinition): + """ + Sets the parameters that are environment dependent. As a side effect, initializes all the components that are + dependent on those values, by calling init_environment_dependent_modules + + :param spaces: the environment spaces definition + :return: None + """ + self.spaces = copy.deepcopy(spaces) + + if self.ap.algorithm.use_accumulated_reward_as_measurement: + if 'measurements' in self.spaces.state.sub_spaces: + self.spaces.state['measurements'].shape += 1 + self.spaces.state['measurements'].measurements_names += ['accumulated_reward'] + else: + self.spaces.state['measurements'] = VectorObservationSpace(1, measurements_names=['accumulated_reward']) + + for observation_name in self.spaces.state.sub_spaces.keys(): + self.spaces.state[observation_name] = \ + self.pre_network_filter.get_filtered_observation_space(observation_name, + self.input_filter.get_filtered_observation_space(observation_name, + self.spaces.state[observation_name])) + + self.spaces.reward = self.pre_network_filter.get_filtered_reward_space( + self.input_filter.get_filtered_reward_space(self.spaces.reward)) + + self.spaces.action = self.output_filter.get_unfiltered_action_space(self.spaces.action) + + if isinstance(self.in_action_space, GoalsSpace): + # TODO: what if the goal type is an embedding / embedding change? + self.spaces.goal = self.in_action_space + self.spaces.goal.set_target_space(self.spaces.state[self.spaces.goal.goal_name]) + + self.init_environment_dependent_modules()
+ +
[docs] def create_networks(self) -> Dict[str, NetworkWrapper]: + """ + Create all the networks of the agent. + The network creation will be done after setting the environment parameters for the agent, since they are needed + for creating the network. + + :return: A list containing all the networks + """ + networks = {} + for network_name in sorted(self.ap.network_wrappers.keys()): + networks[network_name] = NetworkWrapper(name=network_name, + agent_parameters=self.ap, + has_target=self.ap.network_wrappers[network_name].create_target_network, + has_global=self.has_global, + spaces=self.spaces, + replicated_device=self.replicated_device, + worker_device=self.worker_device) + + if self.ap.visualization.print_networks_summary: + print(networks[network_name]) + + return networks
+ +
[docs] def init_environment_dependent_modules(self) -> None: + """ + Initialize any modules that depend on knowing information about the environment such as the action space or + the observation space + + :return: None + """ + # initialize exploration policy + if isinstance(self.ap.exploration, dict): + if self.spaces.action.__class__ in self.ap.exploration.keys(): + self.ap.exploration = self.ap.exploration[self.spaces.action.__class__] + else: + raise ValueError("The exploration parameters were defined as a mapping between action space types and " + "exploration types, but the action space used by the environment ({}) was not part of " + "the exploration parameters dictionary keys ({})" + .format(self.spaces.action.__class__, list(self.ap.exploration.keys()))) + self.ap.exploration.action_space = self.spaces.action + self.exploration_policy = dynamic_import_and_instantiate_module_from_params(self.ap.exploration) + + # create all the networks of the agent + self.networks = self.create_networks()
+ + @property + def phase(self) -> RunPhase: + """ + The current running phase of the agent + + :return: RunPhase + """ + return self._phase + + @phase.setter + def phase(self, val: RunPhase) -> None: + """ + Change the phase of the run for the agent and all the sub components + + :param val: the new run phase (TRAIN, TEST, etc.) + :return: None + """ + self.reset_evaluation_state(val) + self._phase = val + self.exploration_policy.change_phase(val) + +
[docs] def reset_evaluation_state(self, val: RunPhase) -> None: + """ + Perform accumulators initialization when entering an evaluation phase, and signal dumping when exiting an + evaluation phase. Entering or exiting the evaluation phase is determined according to the new phase given + by val, and by the current phase set in self.phase. + + :param val: The new phase to change to + :return: None + """ + starting_evaluation = (val == RunPhase.TEST) + ending_evaluation = (self.phase == RunPhase.TEST) + + if starting_evaluation: + self.accumulated_rewards_across_evaluation_episodes = 0 + self.accumulated_shaped_rewards_across_evaluation_episodes = 0 + self.num_successes_across_evaluation_episodes = 0 + self.num_evaluation_episodes_completed = 0 + if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high": + screen.log_title("{}: Starting evaluation phase".format(self.name)) + + elif ending_evaluation: + # we write to the next episode, because it could be that the current episode was already written + # to disk and then we won't write it again + self.agent_logger.set_current_time(self.current_episode + 1) + self.agent_logger.create_signal_value( + 'Evaluation Reward', + self.accumulated_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed) + self.agent_logger.create_signal_value( + 'Shaped Evaluation Reward', + self.accumulated_shaped_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed) + success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed + self.agent_logger.create_signal_value( + "Success Rate", + success_rate + ) + if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high": + screen.log_title("{}: Finished evaluation phase. Success rate = {}" + .format(self.name, np.round(success_rate, 2)))
+ +
[docs] def call_memory(self, func, args=()): + """ + This function is a wrapper to allow having the same calls for shared or unshared memories. + It should be used instead of calling the memory directly in order to allow different algorithms to work + both with a shared and a local memory. + + :param func: the name of the memory function to call + :param args: the arguments to supply to the function + :return: the return value of the function + """ + if self.shared_memory: + result = self.shared_memory_scratchpad.internal_call(self.memory_lookup_name, func, args) + else: + if type(args) != tuple: + args = (args,) + result = getattr(self.memory, func)(*args) + return result
+ +
[docs] def log_to_screen(self) -> None: + """ + Write an episode summary line to the terminal + + :return: None + """ + # log to screen + log = OrderedDict() + log["Name"] = self.full_name_id + if self.task_id is not None: + log["Worker"] = self.task_id + log["Episode"] = self.current_episode + log["Total reward"] = np.round(self.total_reward_in_current_episode, 2) + log["Exploration"] = np.round(self.exploration_policy.get_control_param(), 2) + log["Steps"] = self.total_steps_counter + log["Training iteration"] = self.training_iteration + screen.log_dict(log, prefix=self.phase.value)
+ +
[docs] def update_step_in_episode_log(self) -> None: + """ + Updates the in-episode log file with all the signal values from the most recent step. + + :return: None + """ + # log all the signals to file + self.agent_episode_logger.set_current_time(self.current_episode_steps_counter) + self.agent_episode_logger.create_signal_value('Training Iter', self.training_iteration) + self.agent_episode_logger.create_signal_value('In Heatup', int(self._phase == RunPhase.HEATUP)) + self.agent_episode_logger.create_signal_value('ER #Transitions', self.call_memory('num_transitions')) + self.agent_episode_logger.create_signal_value('ER #Episodes', self.call_memory('length')) + self.agent_episode_logger.create_signal_value('Total steps', self.total_steps_counter) + self.agent_episode_logger.create_signal_value("Epsilon", self.exploration_policy.get_control_param()) + self.agent_episode_logger.create_signal_value("Shaped Accumulated Reward", self.total_shaped_reward_in_current_episode) + self.agent_episode_logger.create_signal_value('Update Target Network', 0, overwrite=False) + self.agent_episode_logger.update_wall_clock_time(self.current_episode_steps_counter) + + for signal in self.step_signals: + self.agent_episode_logger.create_signal_value(signal.name, signal.get_last_value()) + + # dump + self.agent_episode_logger.dump_output_csv()
+ +
[docs] def update_log(self) -> None: + """ + Updates the episodic log file with all the signal values from the most recent episode. + Additional signals for logging can be set by the creating a new signal using self.register_signal, + and then updating it with some internal agent values. + + :return: None + """ + # log all the signals to file + self.agent_logger.set_current_time(self.current_episode) + self.agent_logger.create_signal_value('Training Iter', self.training_iteration) + self.agent_logger.create_signal_value('In Heatup', int(self._phase == RunPhase.HEATUP)) + self.agent_logger.create_signal_value('ER #Transitions', self.call_memory('num_transitions')) + self.agent_logger.create_signal_value('ER #Episodes', self.call_memory('length')) + self.agent_logger.create_signal_value('Episode Length', self.current_episode_steps_counter) + self.agent_logger.create_signal_value('Total steps', self.total_steps_counter) + self.agent_logger.create_signal_value("Epsilon", np.mean(self.exploration_policy.get_control_param())) + self.agent_logger.create_signal_value("Shaped Training Reward", self.total_shaped_reward_in_current_episode + if self._phase == RunPhase.TRAIN else np.nan) + self.agent_logger.create_signal_value("Training Reward", self.total_reward_in_current_episode + if self._phase == RunPhase.TRAIN else np.nan) + + self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False) + self.agent_logger.update_wall_clock_time(self.current_episode) + + if self._phase != RunPhase.TEST: + self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False) + self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False) + self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False) + + for signal in self.episode_signals: + self.agent_logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean()) + self.agent_logger.create_signal_value("{}/Stdev".format(signal.name), signal.get_stdev()) + self.agent_logger.create_signal_value("{}/Max".format(signal.name), signal.get_max()) + self.agent_logger.create_signal_value("{}/Min".format(signal.name), signal.get_min()) + + # dump + if self.current_episode % self.ap.visualization.dump_signals_to_csv_every_x_episodes == 0 \ + and self.current_episode > 0: + self.agent_logger.dump_output_csv()
+ +
[docs] def handle_episode_ended(self) -> None: + """ + Make any changes needed when each episode is ended. + This includes incrementing counters, updating full episode dependent values, updating logs, etc. + This function is called right after each episode is ended. + + :return: None + """ + self.current_episode_buffer.is_complete = True + self.current_episode_buffer.update_transitions_rewards_and_bootstrap_data() + + for transition in self.current_episode_buffer.transitions: + self.discounted_return.add_sample(transition.n_step_discounted_rewards) + + if self.phase != RunPhase.TEST or self.ap.task_parameters.evaluate_only: + self.current_episode += 1 + + if self.phase != RunPhase.TEST: + if isinstance(self.memory, EpisodicExperienceReplay): + self.call_memory('store_episode', self.current_episode_buffer) + elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: + for transition in self.current_episode_buffer.transitions: + self.call_memory('store', transition) + + if self.phase == RunPhase.TEST: + self.accumulated_rewards_across_evaluation_episodes += self.total_reward_in_current_episode + self.accumulated_shaped_rewards_across_evaluation_episodes += self.total_shaped_reward_in_current_episode + self.num_evaluation_episodes_completed += 1 + + if self.spaces.reward.reward_success_threshold and \ + self.total_reward_in_current_episode >= self.spaces.reward.reward_success_threshold: + self.num_successes_across_evaluation_episodes += 1 + + if self.ap.visualization.dump_csv: + self.update_log() + + if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high": + self.log_to_screen()
+ +
[docs] def reset_internal_state(self) -> None: + """ + Reset all the episodic parameters. This function is called right before each episode starts. + + :return: None + """ + for signal in self.episode_signals: + signal.reset() + for signal in self.step_signals: + signal.reset() + self.agent_episode_logger.set_episode_idx(self.current_episode) + self.total_shaped_reward_in_current_episode = 0 + self.total_reward_in_current_episode = 0 + self.curr_state = {} + self.current_episode_steps_counter = 0 + self.episode_running_info = {} + self.current_episode_buffer = Episode(discount=self.ap.algorithm.discount, n_step=self.ap.algorithm.n_step) + if self.exploration_policy: + self.exploration_policy.reset() + self.input_filter.reset() + self.output_filter.reset() + self.pre_network_filter.reset() + if isinstance(self.memory, EpisodicExperienceReplay): + self.call_memory('verify_last_episode_is_closed') + + for network in self.networks.values(): + network.online_network.reset_internal_memory()
+ +
[docs] def learn_from_batch(self, batch) -> Tuple[float, List, List]: + """ + Given a batch of transitions, calculates their target values and updates the network. + + :param batch: A list of transitions + :return: The total loss of the training, the loss per head and the unclipped gradients + """ + return 0, [], []
+ + def _should_update_online_weights_to_target(self): + """ + Determine if online weights should be copied to the target. + + :return: boolean: True if the online weights should be copied to the target. + """ + + # update the target network of every network that has a target network + step_method = self.ap.algorithm.num_steps_between_copying_online_weights_to_target + if step_method.__class__ == TrainingSteps: + should_update = (self.training_iteration - self.last_target_network_update_step) >= step_method.num_steps + if should_update: + self.last_target_network_update_step = self.training_iteration + elif step_method.__class__ == EnvironmentSteps: + should_update = (self.total_steps_counter - self.last_target_network_update_step) >= step_method.num_steps + if should_update: + self.last_target_network_update_step = self.total_steps_counter + else: + raise ValueError("The num_steps_between_copying_online_weights_to_target parameter should be either " + "EnvironmentSteps or TrainingSteps. Instead it is {}".format(step_method.__class__)) + return should_update + + def _should_train(self, wait_for_full_episode=False) -> bool: + """ + Determine if we should start a training phase according to the number of steps passed since the last training + + :return: boolean: True if we should start a training phase + """ + + should_update = self._should_train_helper(wait_for_full_episode=wait_for_full_episode) + + step_method = self.ap.algorithm.num_consecutive_playing_steps + + if should_update: + if step_method.__class__ == EnvironmentEpisodes: + self.last_training_phase_step = self.current_episode + if step_method.__class__ == EnvironmentSteps: + self.last_training_phase_step = self.total_steps_counter + + return should_update + + def _should_train_helper(self, wait_for_full_episode=False): + + step_method = self.ap.algorithm.num_consecutive_playing_steps + + if step_method.__class__ == EnvironmentEpisodes: + should_update = (self.current_episode - self.last_training_phase_step) >= step_method.num_steps + should_update = should_update and self.call_memory('length') > 0 + + elif step_method.__class__ == EnvironmentSteps: + should_update = (self.total_steps_counter - self.last_training_phase_step) >= step_method.num_steps + should_update = should_update and self.call_memory('num_transitions') > 0 + + if wait_for_full_episode: + should_update = should_update and self.current_episode_buffer.is_complete + else: + raise ValueError("The num_consecutive_playing_steps parameter should be either " + "EnvironmentSteps or Episodes. Instead it is {}".format(step_method.__class__)) + + return should_update + +
[docs] def train(self) -> float: + """ + Check if a training phase should be done as configured by num_consecutive_playing_steps. + If it should, then do several training steps as configured by num_consecutive_training_steps. + A single training iteration: Sample a batch, train on it and update target networks. + + :return: The total training loss during the training iterations. + """ + loss = 0 + if self._should_train(): + for network in self.networks.values(): + network.set_is_training(True) + + for training_step in range(self.ap.algorithm.num_consecutive_training_steps): + # TODO: this should be network dependent + network_parameters = list(self.ap.network_wrappers.values())[0] + + # update counters + self.training_iteration += 1 + + # sample a batch and train on it + batch = self.call_memory('sample', network_parameters.batch_size) + if self.pre_network_filter is not None: + batch = self.pre_network_filter.filter(batch, update_internal_state=False, deep_copy=False) + + # if the batch returned empty then there are not enough samples in the replay buffer -> skip + # training step + if len(batch) > 0: + # train + batch = Batch(batch) + total_loss, losses, unclipped_grads = self.learn_from_batch(batch) + loss += total_loss + self.unclipped_grads.add_sample(unclipped_grads) + + # TODO: the learning rate decay should be done through the network instead of here + # decay learning rate + if network_parameters.learning_rate_decay_rate != 0: + self.curr_learning_rate.add_sample(self.networks['main'].sess.run( + self.networks['main'].online_network.current_learning_rate)) + else: + self.curr_learning_rate.add_sample(network_parameters.learning_rate) + + if any([network.has_target for network in self.networks.values()]) \ + and self._should_update_online_weights_to_target(): + for network in self.networks.values(): + network.update_target_network(self.ap.algorithm.rate_for_copying_weights_to_target) + + self.agent_logger.create_signal_value('Update Target Network', 1) + else: + self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False) + + self.loss.add_sample(loss) + + if self.imitation: + self.log_to_screen() + + for network in self.networks.values(): + network.set_is_training(False) + + # run additional commands after the training is done + self.post_training_commands() + + return loss
+ +
[docs] def choose_action(self, curr_state): + """ + choose an action to act with in the current episode being played. Different behavior might be exhibited when + training or testing. + + :param curr_state: the current state to act upon. + :return: chosen action, some action value describing the action (q-value, probability, etc) + """ + pass
+ +
[docs] def prepare_batch_for_inference(self, states: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], + network_name: str) -> Dict[str, np.array]: + """ + Convert curr_state into input tensors tensorflow is expecting. i.e. if we have several inputs states, stack all + observations together, measurements together, etc. + + :param states: A list of environment states, where each one is a dict mapping from an observation name to its + corresponding observation + :param network_name: The agent network name to prepare the batch for. this is needed in order to extract only + the observation relevant for the network from the states. + :return: A dictionary containing a list of values from all the given states for each of the observations + """ + # convert to batch so we can run it through the network + states = force_list(states) + batches_dict = {} + for key in self.ap.network_wrappers[network_name].input_embedders_parameters.keys(): + # there are cases (e.g. ddpg) where the state does not contain all the information needed for running + # through the network and this has to be added externally (e.g. ddpg where the action needs to be given in + # addition to the current_state, so that all the inputs of the network will be filled) + if key in states[0].keys(): + batches_dict[key] = np.array([np.array(state[key]) for state in states]) + + return batches_dict
+ +
[docs] def act(self) -> ActionInfo: + """ + Given the agents current knowledge, decide on the next action to apply to the environment + + :return: An ActionInfo object, which contains the action and any additional info from the action decision process + """ + if self.phase == RunPhase.TRAIN and self.ap.algorithm.num_consecutive_playing_steps.num_steps == 0: + # This agent never plays while training (e.g. behavioral cloning) + return None + + # count steps (only when training or if we are in the evaluation worker) + if self.phase != RunPhase.TEST or self.ap.task_parameters.evaluate_only: + self.total_steps_counter += 1 + self.current_episode_steps_counter += 1 + + # decide on the action + if self.phase == RunPhase.HEATUP and not self.ap.algorithm.heatup_using_network_decisions: + # random action + self.last_action_info = self.spaces.action.sample_with_info() + else: + # informed action + if self.pre_network_filter is not None: + # before choosing an action, first use the pre_network_filter to filter out the current state + curr_state = self.run_pre_network_filter_for_inference(self.curr_state) + + else: + curr_state = self.curr_state + self.last_action_info = self.choose_action(curr_state) + + filtered_action_info = self.output_filter.filter(self.last_action_info) + + return filtered_action_info
+ +
[docs] def run_pre_network_filter_for_inference(self, state: StateType) -> StateType: + """ + Run filters which where defined for being applied right before using the state for inference. + + :param state: The state to run the filters on + :return: The filtered state + """ + dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) + return self.pre_network_filter.filter(dummy_env_response)[0].next_state
+ +
[docs] def get_state_embedding(self, state: dict) -> np.ndarray: + """ + Given a state, get the corresponding state embedding from the main network + + :param state: a state dict + :return: a numpy embedding vector + """ + # TODO: this won't work anymore + # TODO: instead of the state embedding (which contains the goal) we should use the observation embedding + embedding = self.networks['main'].online_network.predict( + self.prepare_batch_for_inference(state, "main"), + outputs=self.networks['main'].online_network.state_embedding) + return embedding
+ +
[docs] def update_transition_before_adding_to_replay_buffer(self, transition: Transition) -> Transition: + """ + Allows agents to update the transition just before adding it to the replay buffer. + Can be useful for agents that want to tweak the reward, termination signal, etc. + + :param transition: the transition to update + :return: the updated transition + """ + return transition
+ +
[docs] def observe(self, env_response: EnvResponse) -> bool: + """ + Given a response from the environment, distill the observation from it and store it for later use. + The response should be a dictionary containing the performed action, the new observation and measurements, + the reward, a game over flag and any additional information necessary. + + :param env_response: result of call from environment.step(action) + :return: a boolean value which determines if the agent has decided to terminate the episode after seeing the + given observation + """ + + # filter the env_response + filtered_env_response = self.input_filter.filter(env_response)[0] + + # inject agent collected statistics, if required + if self.ap.algorithm.use_accumulated_reward_as_measurement: + if 'measurements' in filtered_env_response.next_state: + filtered_env_response.next_state['measurements'] = np.append(filtered_env_response.next_state['measurements'], + self.total_shaped_reward_in_current_episode) + else: + filtered_env_response.next_state['measurements'] = np.array([self.total_shaped_reward_in_current_episode]) + + # if we are in the first step in the episode, then we don't have a a next state and a reward and thus no + # transition yet, and therefore we don't need to store anything in the memory. + # also we did not reach the goal yet. + if self.current_episode_steps_counter == 0: + # initialize the current state + self.curr_state = filtered_env_response.next_state + return env_response.game_over + else: + transition = Transition(state=copy.copy(self.curr_state), action=self.last_action_info.action, + reward=filtered_env_response.reward, next_state=filtered_env_response.next_state, + game_over=filtered_env_response.game_over, info=filtered_env_response.info) + + # now that we have formed a basic transition - the next state progresses to be the current state + self.curr_state = filtered_env_response.next_state + + # make agent specific changes to the transition if needed + transition = self.update_transition_before_adding_to_replay_buffer(transition) + + # merge the intrinsic reward in + if self.ap.algorithm.scale_external_reward_by_intrinsic_reward_value: + transition.reward = transition.reward * (1 + self.last_action_info.action_intrinsic_reward) + else: + transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward + + # sum up the total shaped reward + self.total_shaped_reward_in_current_episode += transition.reward + self.total_reward_in_current_episode += env_response.reward + self.shaped_reward.add_sample(transition.reward) + self.reward.add_sample(env_response.reward) + + # add action info to transition + if type(self.parent).__name__ == 'CompositeAgent': + transition.add_info(self.parent.last_action_info.__dict__) + else: + transition.add_info(self.last_action_info.__dict__) + + # create and store the transition + if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: + # for episodic memories we keep the transitions in a local buffer until the episode is ended. + # for regular memories we insert the transitions directly to the memory + self.current_episode_buffer.insert(transition) + if not isinstance(self.memory, EpisodicExperienceReplay) \ + and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: + self.call_memory('store', transition) + + if self.ap.visualization.dump_in_episode_signals: + self.update_step_in_episode_log() + + return transition.game_over
+ +
[docs] def post_training_commands(self) -> None: + """ + A function which allows adding any functionality that is required to run right after the training phase ends. + + :return: None + """ + pass
+ +
[docs] def get_predictions(self, states: List[Dict[str, np.ndarray]], prediction_type: PredictionType): + """ + Get a prediction from the agent with regard to the requested prediction_type. + If the agent cannot predict this type of prediction_type, or if there is more than possible way to do so, + raise a ValueException. + + :param states: The states to get a prediction for + :param prediction_type: The type of prediction to get for the states. For example, the state-value prediction. + :return: the predicted values + """ + + predictions = self.networks['main'].online_network.predict_with_prediction_type( + # states=self.dict_state_to_batches_dict(states, 'main'), prediction_type=prediction_type) + states=states, prediction_type=prediction_type) + + if len(predictions.keys()) != 1: + raise ValueError("The network has more than one component {} matching the requested prediction_type {}. ". + format(list(predictions.keys()), prediction_type)) + return list(predictions.values())[0]
+ +
[docs] def set_incoming_directive(self, action: ActionType) -> None: + """ + Allows setting a directive for the agent to follow. This is useful in hierarchy structures, where the agent + has another master agent that is controlling it. In such cases, the master agent can define the goals for the + slave agent, define it's observation, possible actions, etc. The directive type is defined by the agent + in-action-space. + + :param action: The action that should be set as the directive + :return: + """ + if isinstance(self.in_action_space, GoalsSpace): + self.current_hrl_goal = action + elif isinstance(self.in_action_space, AttentionActionSpace): + self.input_filter.observation_filters['attention'].crop_low = action[0] + self.input_filter.observation_filters['attention'].crop_high = action[1] + self.output_filter.action_filters['masking'].set_masking(action[0], action[1])
+ +
[docs] def save_checkpoint(self, checkpoint_id: int) -> None: + """ + Allows agents to store additional information when saving checkpoints. + + :param checkpoint_id: the id of the checkpoint + :return: None + """ + pass
+ +
[docs] def sync(self) -> None: + """ + Sync the global network parameters to local networks + + :return: None + """ + for network in self.networks.values(): + network.sync()
+ + def get_success_rate(self) -> float: + return self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/bc_agent.html b/docs/_modules/rl_coach/agents/bc_agent.html new file mode 100644 index 0000000..7b6529d --- /dev/null +++ b/docs/_modules/rl_coach/agents/bc_agent.html @@ -0,0 +1,308 @@ + + + + + + + + + + + rl_coach.agents.bc_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.bc_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.bc_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.imitation_agent import ImitationAgent
+from rl_coach.architectures.head_parameters import PolicyHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, \
+    MiddlewareScheme
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
+
+
+
[docs]class BCAlgorithmParameters(AlgorithmParameters): + def __init__(self): + super().__init__()
+ + +class BCNetworkParameters(NetworkParameters): + def __init__(self): + super().__init__() + self.input_embedders_parameters = {'observation': InputEmbedderParameters()} + self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium) + self.heads_parameters = [PolicyHeadParameters()] + self.optimizer_type = 'Adam' + self.batch_size = 32 + self.replace_mse_with_huber_loss = False + self.create_target_network = False + + +class BCAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=BCAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=ExperienceReplayParameters(), + networks={"main": BCNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.bc_agent:BCAgent' + + +# Behavioral Cloning Agent +class BCAgent(ImitationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # When using a policy head, the targets refer to the advantages that we are normally feeding the head with. + # In this case, we need the policy head to just predict probabilities, so while we usually train the network + # with log(Pi)*Advantages, in this specific case we will train it to log(Pi), which after the softmax will + # predict Pi (=probabilities) + targets = np.ones(batch.actions().shape[0]) + + result = self.networks['main'].train_and_sync_networks({**batch.states(network_keys), + 'output_0_0': batch.actions()}, + targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads + +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/categorical_dqn_agent.html b/docs/_modules/rl_coach/agents/categorical_dqn_agent.html new file mode 100644 index 0000000..12e04e0 --- /dev/null +++ b/docs/_modules/rl_coach/agents/categorical_dqn_agent.html @@ -0,0 +1,382 @@ + + + + + + + + + + + rl_coach.agents.categorical_dqn_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.categorical_dqn_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.categorical_dqn_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters, DQNAgentParameters
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.head_parameters import CategoricalQHeadParameters
+from rl_coach.core_types import StateType
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
+from rl_coach.schedules import LinearSchedule
+
+
+class CategoricalDQNNetworkParameters(DQNNetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.heads_parameters = [CategoricalQHeadParameters()]
+
+
+
[docs]class CategoricalDQNAlgorithmParameters(DQNAlgorithmParameters): + """ + :param v_min: (float) + The minimal value that will be represented in the network output for predicting the Q value. + Corresponds to :math:`v_{min}` in the paper. + + :param v_max: (float) + The maximum value that will be represented in the network output for predicting the Q value. + Corresponds to :math:`v_{max}` in the paper. + + :param atoms: (int) + The number of atoms that will be used to discretize the range between v_min and v_max. + For the C51 algorithm described in the paper, the number of atoms is 51. + """ + def __init__(self): + super().__init__() + self.v_min = -10.0 + self.v_max = 10.0 + self.atoms = 51
+ + +class CategoricalDQNExplorationParameters(EGreedyParameters): + def __init__(self): + super().__init__() + self.epsilon_schedule = LinearSchedule(1, 0.01, 1000000) + self.evaluation_epsilon = 0.001 + + +class CategoricalDQNAgentParameters(DQNAgentParameters): + def __init__(self): + super().__init__() + self.algorithm = CategoricalDQNAlgorithmParameters() + self.exploration = CategoricalDQNExplorationParameters() + self.network_wrappers = {"main": CategoricalDQNNetworkParameters()} + + @property + def path(self): + return 'rl_coach.agents.categorical_dqn_agent:CategoricalDQNAgent' + + +# Categorical Deep Q Network - https://arxiv.org/pdf/1707.06887.pdf +class CategoricalDQNAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.z_values = np.linspace(self.ap.algorithm.v_min, self.ap.algorithm.v_max, self.ap.algorithm.atoms) + + def distribution_prediction_to_q_values(self, prediction): + return np.dot(prediction, self.z_values) + + # prediction's format is (batch,actions,atoms) + def get_all_q_values_for_states(self, states: StateType): + if self.exploration_policy.requires_action_values(): + prediction = self.get_prediction(states) + q_values = self.distribution_prediction_to_q_values(prediction) + else: + q_values = None + return q_values + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # for the action we actually took, the error is calculated by the atoms distribution + # for all other actions, the error is 0 + distributional_q_st_plus_1, TD_targets = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + + # select the optimal actions for the next state + target_actions = np.argmax(self.distribution_prediction_to_q_values(distributional_q_st_plus_1), axis=1) + m = np.zeros((self.ap.network_wrappers['main'].batch_size, self.z_values.size)) + + batches = np.arange(self.ap.network_wrappers['main'].batch_size) + + # an alternative to the for loop. 3.7x perf improvement vs. the same code done with for looping. + # only 10% speedup overall - leaving commented out as the code is not as clear. + + # tzj_ = np.fmax(np.fmin(batch.rewards() + (1.0 - batch.game_overs()) * self.ap.algorithm.discount * + # np.transpose(np.repeat(self.z_values[np.newaxis, :], batch.size, axis=0), (1, 0)), + # self.z_values[-1]), + # self.z_values[0]) + # + # bj_ = (tzj_ - self.z_values[0]) / (self.z_values[1] - self.z_values[0]) + # u_ = (np.ceil(bj_)).astype(int) + # l_ = (np.floor(bj_)).astype(int) + # m_ = np.zeros((self.ap.network_wrappers['main'].batch_size, self.z_values.size)) + # np.add.at(m_, [batches, l_], + # np.transpose(distributional_q_st_plus_1[batches, target_actions], (1, 0)) * (u_ - bj_)) + # np.add.at(m_, [batches, u_], + # np.transpose(distributional_q_st_plus_1[batches, target_actions], (1, 0)) * (bj_ - l_)) + + for j in range(self.z_values.size): + tzj = np.fmax(np.fmin(batch.rewards() + + (1.0 - batch.game_overs()) * self.ap.algorithm.discount * self.z_values[j], + self.z_values[-1]), + self.z_values[0]) + bj = (tzj - self.z_values[0])/(self.z_values[1] - self.z_values[0]) + u = (np.ceil(bj)).astype(int) + l = (np.floor(bj)).astype(int) + m[batches, l] += (distributional_q_st_plus_1[batches, target_actions, j] * (u - bj)) + m[batches, u] += (distributional_q_st_plus_1[batches, target_actions, j] * (bj - l)) + + # total_loss = cross entropy between actual result above and predicted result for the given action + # only update the action that we have actually done in this transition + TD_targets[batches, batch.actions()] = m + + # update errors in prioritized replay buffer + importance_weights = batch.info('weight') if isinstance(self.memory, PrioritizedExperienceReplay) else None + + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets, + importance_weights=importance_weights) + + total_loss, losses, unclipped_grads = result[:3] + + # TODO: fix this spaghetti code + if isinstance(self.memory, PrioritizedExperienceReplay): + errors = losses[0][np.arange(batch.size), batch.actions()] + self.call_memory('update_priorities', (batch.info('idx'), errors)) + + return total_loss, losses, unclipped_grads + +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/cil_agent.html b/docs/_modules/rl_coach/agents/cil_agent.html new file mode 100644 index 0000000..3fc44b0 --- /dev/null +++ b/docs/_modules/rl_coach/agents/cil_agent.html @@ -0,0 +1,314 @@ + + + + + + + + + + + rl_coach.agents.cil_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.cil_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.cil_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+from rl_coach.agents.imitation_agent import ImitationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import RegressionHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AgentParameters, MiddlewareScheme, NetworkParameters, AlgorithmParameters
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.non_episodic.balanced_experience_replay import BalancedExperienceReplayParameters
+
+
+
[docs]class CILAlgorithmParameters(AlgorithmParameters): + """ + :param state_key_with_the_class_index: (str) + The key of the state dictionary which corresponds to the value that will be used to control the class index. + """ + def __init__(self): + super().__init__() + self.state_key_with_the_class_index = 'high_level_command'
+ + +class CILNetworkParameters(NetworkParameters): + def __init__(self): + super().__init__() + self.input_embedders_parameters = {'observation': InputEmbedderParameters()} + self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium) + self.heads_parameters = [RegressionHeadParameters()] + self.optimizer_type = 'Adam' + self.batch_size = 32 + self.replace_mse_with_huber_loss = False + self.create_target_network = False + + +class CILAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=CILAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=BalancedExperienceReplayParameters(), + networks={"main": CILNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.cil_agent:CILAgent' + + +# Conditional Imitation Learning Agent: https://arxiv.org/abs/1710.02410 +class CILAgent(ImitationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.current_high_level_control = 0 + + def choose_action(self, curr_state): + self.current_high_level_control = curr_state[self.ap.algorithm.state_key_with_the_class_index] + return super().choose_action(curr_state) + + def extract_action_values(self, prediction): + return prediction[self.current_high_level_control].squeeze() + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + target_values = self.networks['main'].online_network.predict({**batch.states(network_keys)}) + + branch_to_update = batch.states([self.ap.algorithm.state_key_with_the_class_index])[self.ap.algorithm.state_key_with_the_class_index] + for idx, branch in enumerate(branch_to_update): + target_values[branch][idx] = batch.actions()[idx] + + result = self.networks['main'].train_and_sync_networks({**batch.states(network_keys)}, target_values) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/clipped_ppo_agent.html b/docs/_modules/rl_coach/agents/clipped_ppo_agent.html new file mode 100644 index 0000000..c24e2ef --- /dev/null +++ b/docs/_modules/rl_coach/agents/clipped_ppo_agent.html @@ -0,0 +1,563 @@ + + + + + + + + + + + rl_coach.agents.clipped_ppo_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.clipped_ppo_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.clipped_ppo_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from collections import OrderedDict
+from random import shuffle
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.actor_critic_agent import ActorCriticAgent
+from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
+    AgentParameters
+from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.categorical import CategoricalParameters
+from rl_coach.logger import screen
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.schedules import ConstantSchedule
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+
+
+class ClippedPPONetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
+        self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
+        self.heads_parameters = [VHeadParameters(), PPOHeadParameters()]
+        self.batch_size = 64
+        self.optimizer_type = 'Adam'
+        self.clip_gradients = None
+        self.use_separate_networks_per_head = True
+        self.async_training = False
+        self.l2_regularization = 0
+
+        # The target network is used in order to freeze the old policy, while making updates to the new one
+        # in train_network()
+        self.create_target_network = True
+        self.shared_optimizer = True
+        self.scale_down_gradients_by_number_of_workers_for_sync_training = True
+
+
+
[docs]class ClippedPPOAlgorithmParameters(AlgorithmParameters): + """ + :param policy_gradient_rescaler: (PolicyGradientRescaler) + This represents how the critic will be used to update the actor. The critic value function is typically used + to rescale the gradients calculated by the actor. There are several ways for doing this, such as using the + advantage of the action, or the generalized advantage estimation (GAE) value. + + :param gae_lambda: (float) + The :math:`\lambda` value is used within the GAE function in order to weight different bootstrap length + estimations. Typical values are in the range 0.9-1, and define an exponential decay over the different + n-step estimations. + + :param clip_likelihood_ratio_using_epsilon: (float) + If not None, the likelihood ratio between the current and new policy in the PPO loss function will be + clipped to the range [1-clip_likelihood_ratio_using_epsilon, 1+clip_likelihood_ratio_using_epsilon]. + This is typically used in the Clipped PPO version of PPO, and should be set to None in regular PPO + implementations. + + :param value_targets_mix_fraction: (float) + The targets for the value network are an exponential weighted moving average which uses this mix fraction to + define how much of the new targets will be taken into account when calculating the loss. + This value should be set to the range (0,1], where 1 means that only the new targets will be taken into account. + + :param estimate_state_value_using_gae: (bool) + If set to True, the state value will be estimated using the GAE technique. + + :param use_kl_regularization: (bool) + If set to True, the loss function will be regularized using the KL diveregence between the current and new + policy, to bound the change of the policy during the network update. + + :param beta_entropy: (float) + An entropy regulaization term can be added to the loss function in order to control exploration. This term + is weighted using the :math:`\beta` value defined by beta_entropy. + + :param optimization_epochs: (int) + For each training phase, the collected dataset will be used for multiple epochs, which are defined by the + optimization_epochs value. + + :param optimization_epochs: (Schedule) + Can be used to define a schedule over the clipping of the likelihood ratio. + + """ + def __init__(self): + super().__init__() + self.num_episodes_in_experience_replay = 1000000 + self.policy_gradient_rescaler = PolicyGradientRescaler.GAE + self.gae_lambda = 0.95 + self.use_kl_regularization = False + self.clip_likelihood_ratio_using_epsilon = 0.2 + self.estimate_state_value_using_gae = True + self.beta_entropy = 0.01 # should be 0 for mujoco + self.num_consecutive_playing_steps = EnvironmentSteps(2048) + self.optimization_epochs = 10 + self.normalization_stats = None + self.clipping_decay_schedule = ConstantSchedule(1)
+ + +class ClippedPPOAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=ClippedPPOAlgorithmParameters(), + exploration={DiscreteActionSpace: CategoricalParameters(), + BoxActionSpace: AdditiveNoiseParameters()}, + memory=EpisodicExperienceReplayParameters(), + networks={"main": ClippedPPONetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.clipped_ppo_agent:ClippedPPOAgent' + + +# Clipped Proximal Policy Optimization - https://arxiv.org/abs/1707.06347 +class ClippedPPOAgent(ActorCriticAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + # signals definition + self.value_loss = self.register_signal('Value Loss') + self.policy_loss = self.register_signal('Policy Loss') + self.total_kl_divergence_during_training_process = 0.0 + self.unclipped_grads = self.register_signal('Grads (unclipped)') + self.value_targets = self.register_signal('Value Targets') + self.kl_divergence = self.register_signal('KL Divergence') + self.likelihood_ratio = self.register_signal('Likelihood Ratio') + self.clipped_likelihood_ratio = self.register_signal('Clipped Likelihood Ratio') + + def set_session(self, sess): + super().set_session(sess) + if self.ap.algorithm.normalization_stats is not None: + self.ap.algorithm.normalization_stats.set_session(sess) + + def fill_advantages(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + current_state_values = self.networks['main'].online_network.predict(batch.states(network_keys))[0] + current_state_values = current_state_values.squeeze() + self.state_values.add_sample(current_state_values) + + # calculate advantages + advantages = [] + value_targets = [] + total_returns = batch.n_step_discounted_rewards() + + if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: + advantages = total_returns - current_state_values + elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: + # get bootstraps + episode_start_idx = 0 + advantages = np.array([]) + value_targets = np.array([]) + for idx, game_over in enumerate(batch.game_overs()): + if game_over: + # get advantages for the rollout + value_bootstrapping = np.zeros((1,)) + rollout_state_values = np.append(current_state_values[episode_start_idx:idx+1], value_bootstrapping) + + rollout_advantages, gae_based_value_targets = \ + self.get_general_advantage_estimation_values(batch.rewards()[episode_start_idx:idx+1], + rollout_state_values) + episode_start_idx = idx + 1 + advantages = np.append(advantages, rollout_advantages) + value_targets = np.append(value_targets, gae_based_value_targets) + else: + screen.warning("WARNING: The requested policy gradient rescaler is not available") + + # standardize + advantages = (advantages - np.mean(advantages)) / np.std(advantages) + + for transition, advantage, value_target in zip(batch.transitions, advantages, value_targets): + transition.info['advantage'] = advantage + transition.info['gae_based_value_target'] = value_target + + self.action_advantages.add_sample(advantages) + + def train_network(self, batch, epochs): + batch_results = [] + for j in range(epochs): + batch.shuffle() + batch_results = { + 'total_loss': [], + 'losses': [], + 'unclipped_grads': [], + 'kl_divergence': [], + 'entropy': [] + } + + fetches = [self.networks['main'].online_network.output_heads[1].kl_divergence, + self.networks['main'].online_network.output_heads[1].entropy, + self.networks['main'].online_network.output_heads[1].likelihood_ratio, + self.networks['main'].online_network.output_heads[1].clipped_likelihood_ratio] + + for i in range(int(batch.size / self.ap.network_wrappers['main'].batch_size)): + start = i * self.ap.network_wrappers['main'].batch_size + end = (i + 1) * self.ap.network_wrappers['main'].batch_size + + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + actions = batch.actions()[start:end] + gae_based_value_targets = batch.info('gae_based_value_target')[start:end] + if not isinstance(self.spaces.action, DiscreteActionSpace) and len(actions.shape) == 1: + actions = np.expand_dims(actions, -1) + + # get old policy probabilities and distribution + + # TODO-perf - the target network ("old_policy") is not changing. this can be calculated once for all epochs. + # the shuffling being done, should only be performed on the indices. + result = self.networks['main'].target_network.predict({k: v[start:end] for k, v in batch.states(network_keys).items()}) + old_policy_distribution = result[1:] + + total_returns = batch.n_step_discounted_rewards(expand_dims=True) + + # calculate gradients and apply on both the local policy network and on the global policy network + if self.ap.algorithm.estimate_state_value_using_gae: + value_targets = np.expand_dims(gae_based_value_targets, -1) + else: + value_targets = total_returns[start:end] + + inputs = copy.copy({k: v[start:end] for k, v in batch.states(network_keys).items()}) + inputs['output_1_0'] = actions + + # The old_policy_distribution needs to be represented as a list, because in the event of + # discrete controls, it has just a mean. otherwise, it has both a mean and standard deviation + for input_index, input in enumerate(old_policy_distribution): + inputs['output_1_{}'.format(input_index + 1)] = input + + # update the clipping decay schedule value + inputs['output_1_{}'.format(len(old_policy_distribution)+1)] = \ + self.ap.algorithm.clipping_decay_schedule.current_value + + total_loss, losses, unclipped_grads, fetch_result = \ + self.networks['main'].train_and_sync_networks( + inputs, [value_targets, batch.info('advantage')[start:end]], additional_fetches=fetches + ) + + batch_results['total_loss'].append(total_loss) + batch_results['losses'].append(losses) + batch_results['unclipped_grads'].append(unclipped_grads) + batch_results['kl_divergence'].append(fetch_result[0]) + batch_results['entropy'].append(fetch_result[1]) + + self.unclipped_grads.add_sample(unclipped_grads) + self.value_targets.add_sample(value_targets) + self.likelihood_ratio.add_sample(fetch_result[2]) + self.clipped_likelihood_ratio.add_sample(fetch_result[3]) + + for key in batch_results.keys(): + batch_results[key] = np.mean(batch_results[key], 0) + + self.value_loss.add_sample(batch_results['losses'][0]) + self.policy_loss.add_sample(batch_results['losses'][1]) + self.loss.add_sample(batch_results['total_loss']) + + if self.ap.network_wrappers['main'].learning_rate_decay_rate != 0: + curr_learning_rate = self.networks['main'].online_network.get_variable_value( + self.networks['main'].online_network.adaptive_learning_rate_scheme) + self.curr_learning_rate.add_sample(curr_learning_rate) + else: + curr_learning_rate = self.ap.network_wrappers['main'].learning_rate + + # log training parameters + screen.log_dict( + OrderedDict([ + ("Surrogate loss", batch_results['losses'][1]), + ("KL divergence", batch_results['kl_divergence']), + ("Entropy", batch_results['entropy']), + ("training epoch", j), + ("learning_rate", curr_learning_rate) + ]), + prefix="Policy training" + ) + + self.total_kl_divergence_during_training_process = batch_results['kl_divergence'] + self.entropy.add_sample(batch_results['entropy']) + self.kl_divergence.add_sample(batch_results['kl_divergence']) + return batch_results['losses'] + + def post_training_commands(self): + # clean memory + self.call_memory('clean') + + def _should_train_helper(self, wait_for_full_episode=True): + return super()._should_train_helper(True) + + def train(self): + if self._should_train(wait_for_full_episode=True): + for network in self.networks.values(): + network.set_is_training(True) + + dataset = self.memory.transitions + dataset = self.pre_network_filter.filter(dataset, deep_copy=False) + batch = Batch(dataset) + + for training_step in range(self.ap.algorithm.num_consecutive_training_steps): + self.networks['main'].sync() + self.fill_advantages(batch) + + # take only the requested number of steps + if isinstance(self.ap.algorithm.num_consecutive_playing_steps, EnvironmentSteps): + dataset = dataset[:self.ap.algorithm.num_consecutive_playing_steps.num_steps] + shuffle(dataset) + batch = Batch(dataset) + + self.train_network(batch, self.ap.algorithm.optimization_epochs) + + for network in self.networks.values(): + network.set_is_training(False) + + self.post_training_commands() + self.training_iteration += 1 + # should be done in order to update the data that has been accumulated * while not playing * + self.update_log() + return None + + def run_pre_network_filter_for_inference(self, state: StateType): + dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) + return self.pre_network_filter.filter(dummy_env_response, update_internal_state=False)[0].next_state + + def choose_action(self, curr_state): + self.ap.algorithm.clipping_decay_schedule.step() + return super().choose_action(curr_state) +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/ddpg_agent.html b/docs/_modules/rl_coach/agents/ddpg_agent.html new file mode 100644 index 0000000..14c7c30 --- /dev/null +++ b/docs/_modules/rl_coach/agents/ddpg_agent.html @@ -0,0 +1,443 @@ + + + + + + + + + + + rl_coach.agents.ddpg_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.ddpg_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.ddpg_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from typing import Union
+from collections import OrderedDict
+
+import numpy as np
+
+from rl_coach.agents.actor_critic_agent import ActorCriticAgent
+from rl_coach.agents.agent import Agent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import DDPGActorHeadParameters, VHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
+    AgentParameters, EmbedderScheme
+from rl_coach.core_types import ActionInfo, EnvironmentSteps
+from rl_coach.exploration_policies.ou_process import OUProcessParameters
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.spaces import BoxActionSpace, GoalsSpace
+
+
+class DDPGCriticNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(batchnorm=True),
+                                            'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)}
+        self.middleware_parameters = FCMiddlewareParameters()
+        self.heads_parameters = [VHeadParameters()]
+        self.optimizer_type = 'Adam'
+        self.batch_size = 64
+        self.async_training = False
+        self.learning_rate = 0.001
+        self.create_target_network = True
+        self.shared_optimizer = True
+        self.scale_down_gradients_by_number_of_workers_for_sync_training = False
+
+
+class DDPGActorNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(batchnorm=True)}
+        self.middleware_parameters = FCMiddlewareParameters(batchnorm=True)
+        self.heads_parameters = [DDPGActorHeadParameters()]
+        self.optimizer_type = 'Adam'
+        self.batch_size = 64
+        self.async_training = False
+        self.learning_rate = 0.0001
+        self.create_target_network = True
+        self.shared_optimizer = True
+        self.scale_down_gradients_by_number_of_workers_for_sync_training = False
+
+
+
[docs]class DDPGAlgorithmParameters(AlgorithmParameters): + """ + :param num_steps_between_copying_online_weights_to_target: (StepMethod) + The number of steps between copying the online network weights to the target network weights. + + :param rate_for_copying_weights_to_target: (float) + When copying the online network weights to the target network weights, a soft update will be used, which + weight the new online network weights by rate_for_copying_weights_to_target + + :param num_consecutive_playing_steps: (StepMethod) + The number of consecutive steps to act between every two training iterations + + :param use_target_network_for_evaluation: (bool) + If set to True, the target network will be used for predicting the actions when choosing actions to act. + Since the target network weights change more slowly, the predicted actions will be more consistent. + + :param action_penalty: (float) + The amount by which to penalize the network on high action feature (pre-activation) values. + This can prevent the actions features from saturating the TanH activation function, and therefore prevent the + gradients from becoming very low. + + :param clip_critic_targets: (Tuple[float, float] or None) + The range to clip the critic target to in order to prevent overestimation of the action values. + + :param use_non_zero_discount_for_terminal_states: (bool) + If set to True, the discount factor will be used for terminal states to bootstrap the next predicted state + values. If set to False, the terminal states reward will be taken as the target return for the network. + """ + def __init__(self): + super().__init__() + self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1) + self.rate_for_copying_weights_to_target = 0.001 + self.num_consecutive_playing_steps = EnvironmentSteps(1) + self.use_target_network_for_evaluation = False + self.action_penalty = 0 + self.clip_critic_targets = None # expected to be a tuple of the form (min_clip_value, max_clip_value) or None + self.use_non_zero_discount_for_terminal_states = False
+ + +class DDPGAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=DDPGAlgorithmParameters(), + exploration=OUProcessParameters(), + memory=EpisodicExperienceReplayParameters(), + networks=OrderedDict([("actor", DDPGActorNetworkParameters()), + ("critic", DDPGCriticNetworkParameters())])) + + @property + def path(self): + return 'rl_coach.agents.ddpg_agent:DDPGAgent' + + +# Deep Deterministic Policy Gradients Network - https://arxiv.org/pdf/1509.02971.pdf +class DDPGAgent(ActorCriticAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + + self.q_values = self.register_signal("Q") + self.TD_targets_signal = self.register_signal("TD targets") + self.action_signal = self.register_signal("actions") + + def learn_from_batch(self, batch): + actor = self.networks['actor'] + critic = self.networks['critic'] + + actor_keys = self.ap.network_wrappers['actor'].input_embedders_parameters.keys() + critic_keys = self.ap.network_wrappers['critic'].input_embedders_parameters.keys() + + # TD error = r + discount*max(q_st_plus_1) - q_st + next_actions, actions_mean = actor.parallel_prediction([ + (actor.target_network, batch.next_states(actor_keys)), + (actor.online_network, batch.states(actor_keys)) + ]) + + critic_inputs = copy.copy(batch.next_states(critic_keys)) + critic_inputs['action'] = next_actions + q_st_plus_1 = critic.target_network.predict(critic_inputs) + + # calculate the bootstrapped TD targets while discounting terminal states according to + # use_non_zero_discount_for_terminal_states + if self.ap.algorithm.use_non_zero_discount_for_terminal_states: + TD_targets = batch.rewards(expand_dims=True) + self.ap.algorithm.discount * q_st_plus_1 + else: + TD_targets = batch.rewards(expand_dims=True) + \ + (1.0 - batch.game_overs(expand_dims=True)) * self.ap.algorithm.discount * q_st_plus_1 + + # clip the TD targets to prevent overestimation errors + if self.ap.algorithm.clip_critic_targets: + TD_targets = np.clip(TD_targets, *self.ap.algorithm.clip_critic_targets) + + self.TD_targets_signal.add_sample(TD_targets) + + # get the gradients of the critic output with respect to the action + critic_inputs = copy.copy(batch.states(critic_keys)) + critic_inputs['action'] = actions_mean + action_gradients = critic.online_network.predict(critic_inputs, + outputs=critic.online_network.gradients_wrt_inputs[0]['action']) + + # train the critic + critic_inputs = copy.copy(batch.states(critic_keys)) + critic_inputs['action'] = batch.actions(len(batch.actions().shape) == 1) + result = critic.train_and_sync_networks(critic_inputs, TD_targets) + total_loss, losses, unclipped_grads = result[:3] + + # apply the gradients from the critic to the actor + initial_feed_dict = {actor.online_network.gradients_weights_ph[0]: -action_gradients} + gradients = actor.online_network.predict(batch.states(actor_keys), + outputs=actor.online_network.weighted_gradients[0], + initial_feed_dict=initial_feed_dict) + + if actor.has_global: + actor.apply_gradients_to_global_network(gradients) + actor.update_online_network() + else: + actor.apply_gradients_to_online_network(gradients) + + return total_loss, losses, unclipped_grads + + def train(self): + return Agent.train(self) + + def choose_action(self, curr_state): + if not (isinstance(self.spaces.action, BoxActionSpace) or isinstance(self.spaces.action, GoalsSpace)): + raise ValueError("DDPG works only for continuous control problems") + # convert to batch so we can run it through the network + tf_input_state = self.prepare_batch_for_inference(curr_state, 'actor') + if self.ap.algorithm.use_target_network_for_evaluation: + actor_network = self.networks['actor'].target_network + else: + actor_network = self.networks['actor'].online_network + + action_values = actor_network.predict(tf_input_state).squeeze() + + action = self.exploration_policy.get_action(action_values) + + self.action_signal.add_sample(action) + + # get q value + tf_input_state = self.prepare_batch_for_inference(curr_state, 'critic') + action_batch = np.expand_dims(action, 0) + if type(action) != np.ndarray: + action_batch = np.array([[action]]) + tf_input_state['action'] = action_batch + q_value = self.networks['critic'].online_network.predict(tf_input_state)[0] + self.q_values.add_sample(q_value) + + action_info = ActionInfo(action=action, + action_value=q_value) + + return action_info +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/dfp_agent.html b/docs/_modules/rl_coach/agents/dfp_agent.html new file mode 100644 index 0000000..2734312 --- /dev/null +++ b/docs/_modules/rl_coach/agents/dfp_agent.html @@ -0,0 +1,475 @@ + + + + + + + + + + + rl_coach.agents.dfp_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.dfp_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.dfp_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from enum import Enum
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.agent import Agent
+from rl_coach.architectures.head_parameters import MeasurementsPredictionHeadParameters
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense
+from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \
+     MiddlewareScheme
+from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.spaces import SpacesDefinition, VectorObservationSpace
+
+
+class HandlingTargetsAfterEpisodeEnd(Enum):
+    LastStep = 0
+    NAN = 1
+
+
+class DFPNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'),
+                                            'measurements': InputEmbedderParameters(activation_function='leaky_relu'),
+                                            'goal': InputEmbedderParameters(activation_function='leaky_relu')}
+
+        self.input_embedders_parameters['observation'].scheme = [
+            Conv2d(32, 8, 4),
+            Conv2d(64, 4, 2),
+            Conv2d(64, 3, 1),
+            Dense(512),
+        ]
+
+        self.input_embedders_parameters['measurements'].scheme = [
+            Dense(128),
+            Dense(128),
+            Dense(128),
+        ]
+
+        self.input_embedders_parameters['goal'].scheme = [
+            Dense(128),
+            Dense(128),
+            Dense(128),
+        ]
+
+        self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu',
+                                                            scheme=MiddlewareScheme.Empty)
+        self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')]
+        self.async_training = False
+        self.batch_size = 64
+        self.adam_optimizer_beta1 = 0.95
+
+
+class DFPMemoryParameters(EpisodicExperienceReplayParameters):
+    def __init__(self):
+        self.max_size = (MemoryGranularity.Transitions, 20000)
+        self.shared_memory = True
+        super().__init__()
+
+
+
[docs]class DFPAlgorithmParameters(AlgorithmParameters): + """ + :param num_predicted_steps_ahead: (int) + Number of future steps to predict measurements for. The future steps won't be sequential, but rather jump + in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4 + + :param goal_vector: (List[float]) + The goal vector will weight each of the measurements to form an optimization goal. The vector should have + the same length as the number of measurements, and it will be vector multiplied by the measurements. + Positive values correspond to trying to maximize the particular measurement, and negative values + correspond to trying to minimize the particular measurement. + + :param future_measurements_weights: (List[float]) + The future_measurements_weights weight the contribution of each of the predicted timesteps to the optimization + goal. For example, if there are 6 steps predicted ahead, and a future_measurements_weights vector with 3 values, + then only the 3 last timesteps will be taken into account, according to the weights in the + future_measurements_weights vector. + + :param use_accumulated_reward_as_measurement: (bool) + If set to True, the accumulated reward from the beginning of the episode will be added as a measurement to + the measurements vector in the state. This van be useful in environments where the given measurements don't + include enough information for the particular goal the agent should achieve. + + :param handling_targets_after_episode_end: (HandlingTargetsAfterEpisodeEnd) + Dictates how to handle measurements that are outside the episode length. + + :param scale_measurements_targets: (Dict[str, float]) + Allows rescaling the values of each of the measurements available. This van be useful when the measurements + have a different scale and you want to normalize them to the same scale. + """ + def __init__(self): + super().__init__() + self.num_predicted_steps_ahead = 6 + self.goal_vector = [1.0, 1.0] + self.future_measurements_weights = [0.5, 0.5, 1.0] + self.use_accumulated_reward_as_measurement = False + self.handling_targets_after_episode_end = HandlingTargetsAfterEpisodeEnd.NAN + self.scale_measurements_targets = {} + self.num_consecutive_playing_steps = EnvironmentSteps(8)
+ + +class DFPAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=DFPAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=DFPMemoryParameters(), + networks={"main": DFPNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.dfp_agent:DFPAgent' + + +# Direct Future Prediction Agent - http://vladlen.info/papers/learning-to-act.pdf +class DFPAgent(Agent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.current_goal = self.ap.algorithm.goal_vector + self.target_measurements_scale_factors = None + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + network_inputs = batch.states(network_keys) + network_inputs['goal'] = np.repeat(np.expand_dims(self.current_goal, 0), + self.ap.network_wrappers['main'].batch_size, axis=0) + + # get the current outputs of the network + targets = self.networks['main'].online_network.predict(network_inputs) + + # change the targets for the taken actions + for i in range(self.ap.network_wrappers['main'].batch_size): + targets[i, batch.actions()[i]] = batch[i].info['future_measurements'].flatten() + + result = self.networks['main'].train_and_sync_networks(network_inputs, targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads + + def choose_action(self, curr_state): + if self.exploration_policy.requires_action_values(): + # predict the future measurements + tf_input_state = self.prepare_batch_for_inference(curr_state, 'main') + tf_input_state['goal'] = np.expand_dims(self.current_goal, 0) + measurements_future_prediction = self.networks['main'].online_network.predict(tf_input_state)[0] + action_values = np.zeros(len(self.spaces.action.actions)) + num_steps_used_for_objective = len(self.ap.algorithm.future_measurements_weights) + + # calculate the score of each action by multiplying it's future measurements with the goal vector + for action_idx in range(len(self.spaces.action.actions)): + action_measurements = measurements_future_prediction[action_idx] + action_measurements = np.reshape(action_measurements, + (self.ap.algorithm.num_predicted_steps_ahead, + self.spaces.state['measurements'].shape[0])) + future_steps_values = np.dot(action_measurements, self.current_goal) + action_values[action_idx] = np.dot(future_steps_values[-num_steps_used_for_objective:], + self.ap.algorithm.future_measurements_weights) + else: + action_values = None + + # choose action according to the exploration policy and the current phase (evaluating or training the agent) + action = self.exploration_policy.get_action(action_values) + + if action_values is not None: + action_values = action_values.squeeze() + action_info = ActionInfo(action=action, action_value=action_values[action]) + else: + action_info = ActionInfo(action=action) + + return action_info + + def set_environment_parameters(self, spaces: SpacesDefinition): + self.spaces = copy.deepcopy(spaces) + self.spaces.goal = VectorObservationSpace(shape=self.spaces.state['measurements'].shape, + measurements_names= + self.spaces.state['measurements'].measurements_names) + + # if the user has filled some scale values, check that he got the names right + if set(self.spaces.state['measurements'].measurements_names).intersection( + self.ap.algorithm.scale_measurements_targets.keys()) !=\ + set(self.ap.algorithm.scale_measurements_targets.keys()): + raise ValueError("Some of the keys in parameter scale_measurements_targets ({}) are not defined in " + "the measurements space {}".format(self.ap.algorithm.scale_measurements_targets.keys(), + self.spaces.state['measurements'].measurements_names)) + + super().set_environment_parameters(self.spaces) + + # the below is done after calling the base class method, as it might add accumulated reward as a measurement + + # fill out the missing measurements scale factors + for measurement_name in self.spaces.state['measurements'].measurements_names: + if measurement_name not in self.ap.algorithm.scale_measurements_targets: + self.ap.algorithm.scale_measurements_targets[measurement_name] = 1 + + self.target_measurements_scale_factors = \ + np.array([self.ap.algorithm.scale_measurements_targets[measurement_name] for measurement_name in + self.spaces.state['measurements'].measurements_names]) + + def handle_episode_ended(self): + last_episode = self.current_episode_buffer + if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP] and last_episode: + self._update_measurements_targets(last_episode, + self.ap.algorithm.num_predicted_steps_ahead) + super().handle_episode_ended() + + def _update_measurements_targets(self, episode, num_steps): + if 'measurements' not in episode.transitions[0].state or episode.transitions[0].state['measurements'] == []: + raise ValueError("Measurements are not present in the transitions of the last episode played. ") + measurements_size = self.spaces.state['measurements'].shape[0] + for transition_idx, transition in enumerate(episode.transitions): + transition.info['future_measurements'] = np.zeros((num_steps, measurements_size)) + for step in range(num_steps): + offset_idx = transition_idx + 2 ** step + + if offset_idx >= episode.length(): + if self.ap.algorithm.handling_targets_after_episode_end == HandlingTargetsAfterEpisodeEnd.NAN: + # the special MSE loss will ignore those entries so that the gradient will be 0 for these + transition.info['future_measurements'][step] = np.nan + continue + + elif self.ap.algorithm.handling_targets_after_episode_end == HandlingTargetsAfterEpisodeEnd.LastStep: + offset_idx = - 1 + + transition.info['future_measurements'][step] = \ + self.target_measurements_scale_factors * \ + (episode.transitions[offset_idx].state['measurements'] - transition.state['measurements']) +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/dqn_agent.html b/docs/_modules/rl_coach/agents/dqn_agent.html new file mode 100644 index 0000000..c60551f --- /dev/null +++ b/docs/_modules/rl_coach/agents/dqn_agent.html @@ -0,0 +1,326 @@ + + + + + + + + + + + rl_coach.agents.dqn_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.dqn_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.dqn_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import QHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \
+    MiddlewareScheme
+from rl_coach.core_types import EnvironmentSteps
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters
+from rl_coach.schedules import LinearSchedule
+
+
+
[docs]class DQNAlgorithmParameters(AlgorithmParameters): + def __init__(self): + super().__init__() + self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(10000) + self.num_consecutive_playing_steps = EnvironmentSteps(4) + self.discount = 0.99
+ + +class DQNNetworkParameters(NetworkParameters): + def __init__(self): + super().__init__() + self.input_embedders_parameters = {'observation': InputEmbedderParameters()} + self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium) + self.heads_parameters = [QHeadParameters()] + self.optimizer_type = 'Adam' + self.batch_size = 32 + self.replace_mse_with_huber_loss = True + self.create_target_network = True + + +class DQNAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=DQNAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=ExperienceReplayParameters(), + networks={"main": DQNNetworkParameters()}) + self.exploration.epsilon_schedule = LinearSchedule(1, 0.1, 1000000) + self.exploration.evaluation_epsilon = 0.05 + + @property + def path(self): + return 'rl_coach.agents.dqn_agent:DQNAgent' + + +# Deep Q Network - https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf +
[docs]class DQNAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + +
[docs] def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # for the action we actually took, the error is: + # TD error = r + discount*max(q_st_plus_1) - q_st + # # for all other actions, the error is 0 + q_st_plus_1, TD_targets = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + + # only update the action that we have actually done in this transition + TD_errors = [] + for i in range(self.ap.network_wrappers['main'].batch_size): + new_target = batch.rewards()[i] +\ + (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * np.max(q_st_plus_1[i], 0) + TD_errors.append(np.abs(new_target - TD_targets[i, batch.actions()[i]])) + TD_targets[i, batch.actions()[i]] = new_target + + # update errors in prioritized replay buffer + importance_weights = self.update_transition_priorities_and_get_weights(TD_errors, batch) + + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets, + importance_weights=importance_weights) + + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/mmc_agent.html b/docs/_modules/rl_coach/agents/mmc_agent.html new file mode 100644 index 0000000..d27a727 --- /dev/null +++ b/docs/_modules/rl_coach/agents/mmc_agent.html @@ -0,0 +1,306 @@ + + + + + + + + + + + rl_coach.agents.mmc_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.mmc_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.mmc_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+
+
+
[docs]class MixedMonteCarloAlgorithmParameters(DQNAlgorithmParameters): + """ + :param monte_carlo_mixing_rate: (float) + The mixing rate is used for setting the amount of monte carlo estimate (full return) that will be mixes into + the single-step bootstrapped targets. + """ + def __init__(self): + super().__init__() + self.monte_carlo_mixing_rate = 0.1
+ + +class MixedMonteCarloAgentParameters(DQNAgentParameters): + def __init__(self): + super().__init__() + self.algorithm = MixedMonteCarloAlgorithmParameters() + self.memory = EpisodicExperienceReplayParameters() + + @property + def path(self): + return 'rl_coach.agents.mmc_agent:MixedMonteCarloAgent' + + +class MixedMonteCarloAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.mixing_rate = agent_parameters.algorithm.monte_carlo_mixing_rate + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # for the 1-step, we use the double-dqn target. hence actions are taken greedily according to the online network + selected_actions = np.argmax(self.networks['main'].online_network.predict(batch.next_states(network_keys)), 1) + + # TD_targets are initialized with the current prediction so that we will + # only update the action that we have actually done in this transition + q_st_plus_1, TD_targets = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + + total_returns = batch.n_step_discounted_rewards() + + for i in range(self.ap.network_wrappers['main'].batch_size): + one_step_target = batch.rewards()[i] + \ + (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * \ + q_st_plus_1[i][selected_actions[i]] + monte_carlo_target = total_returns[i] + TD_targets[i, batch.actions()[i]] = (1 - self.mixing_rate) * one_step_target + \ + self.mixing_rate * monte_carlo_target + + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/n_step_q_agent.html b/docs/_modules/rl_coach/agents/n_step_q_agent.html new file mode 100644 index 0000000..bb1f371 --- /dev/null +++ b/docs/_modules/rl_coach/agents/n_step_q_agent.html @@ -0,0 +1,373 @@ + + + + + + + + + + + rl_coach.agents.n_step_q_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.n_step_q_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.n_step_q_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import QHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters
+
+from rl_coach.core_types import EnvironmentSteps
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
+from rl_coach.utils import last_sample
+
+
+class NStepQNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
+        self.middleware_parameters = FCMiddlewareParameters()
+        self.heads_parameters = [QHeadParameters()]
+        self.optimizer_type = 'Adam'
+        self.async_training = True
+        self.shared_optimizer = True
+        self.create_target_network = True
+
+
+
[docs]class NStepQAlgorithmParameters(AlgorithmParameters): + """ + :param num_steps_between_copying_online_weights_to_target: (StepMethod) + The number of steps between copying the online network weights to the target network weights. + + :param apply_gradients_every_x_episodes: (int) + The number of episodes between applying the accumulated gradients to the network. After every + num_steps_between_gradient_updates steps, the agent will calculate the gradients for the collected data, + it will then accumulate it in internal accumulators, and will only apply them to the network once in every + apply_gradients_every_x_episodes episodes. + + :param num_steps_between_gradient_updates: (int) + The number of steps between calculating gradients for the collected data. In the A3C paper, this parameter is + called t_max. Since this algorithm is on-policy, only the steps collected between each two gradient calculations + are used in the batch. + + :param targets_horizon: (str) + Should be either 'N-Step' or '1-Step', and defines the length for which to bootstrap the network values over. + Essentially, 1-Step follows the regular 1 step bootstrapping Q learning update. For more information, + please refer to the original paper (https://arxiv.org/abs/1602.01783) + """ + def __init__(self): + super().__init__() + self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(10000) + self.apply_gradients_every_x_episodes = 1 + self.num_steps_between_gradient_updates = 5 # this is called t_max in all the papers + self.targets_horizon = 'N-Step'
+ + +class NStepQAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=NStepQAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=SingleEpisodeBufferParameters(), + networks={"main": NStepQNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.n_step_q_agent:NStepQAgent' + + +# N Step Q Learning Agent - https://arxiv.org/abs/1602.01783 +class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.last_gradient_update_step_idx = 0 + self.q_values = self.register_signal('Q Values') + self.value_loss = self.register_signal('Value Loss') + + def learn_from_batch(self, batch): + # batch contains a list of episodes to learn from + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # get the values for the current states + state_value_head_targets = self.networks['main'].online_network.predict(batch.states(network_keys)) + + # the targets for the state value estimator + if self.ap.algorithm.targets_horizon == '1-Step': + # 1-Step Q learning + q_st_plus_1 = self.networks['main'].target_network.predict(batch.next_states(network_keys)) + + for i in reversed(range(batch.size)): + state_value_head_targets[i][batch.actions()[i]] = \ + batch.rewards()[i] \ + + (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * np.max(q_st_plus_1[i], 0) + + elif self.ap.algorithm.targets_horizon == 'N-Step': + # N-Step Q learning + if batch.game_overs()[-1]: + R = 0 + else: + R = np.max(self.networks['main'].target_network.predict(last_sample(batch.next_states(network_keys)))) + + for i in reversed(range(batch.size)): + R = batch.rewards()[i] + self.ap.algorithm.discount * R + state_value_head_targets[i][batch.actions()[i]] = R + + else: + assert True, 'The available values for targets_horizon are: 1-Step, N-Step' + + # train + result = self.networks['main'].online_network.accumulate_gradients(batch.states(network_keys), [state_value_head_targets]) + + # logging + total_loss, losses, unclipped_grads = result[:3] + self.value_loss.add_sample(losses[0]) + + return total_loss, losses, unclipped_grads + + def train(self): + # update the target network of every network that has a target network + if any([network.has_target for network in self.networks.values()]) \ + and self._should_update_online_weights_to_target(): + for network in self.networks.values(): + network.update_target_network(self.ap.algorithm.rate_for_copying_weights_to_target) + + self.agent_logger.create_signal_value('Update Target Network', 1) + else: + self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False) + + return PolicyOptimizationAgent.train(self) +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/naf_agent.html b/docs/_modules/rl_coach/agents/naf_agent.html new file mode 100644 index 0000000..72d71b3 --- /dev/null +++ b/docs/_modules/rl_coach/agents/naf_agent.html @@ -0,0 +1,354 @@ + + + + + + + + + + + rl_coach.agents.naf_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.naf_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.naf_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import NAFHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \
+    NetworkParameters
+
+from rl_coach.core_types import ActionInfo, EnvironmentSteps
+from rl_coach.exploration_policies.ou_process import OUProcessParameters
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.spaces import BoxActionSpace
+
+
+class NAFNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
+        self.middleware_parameters = FCMiddlewareParameters()
+        self.heads_parameters = [NAFHeadParameters()]
+        self.optimizer_type = 'Adam'
+        self.learning_rate = 0.001
+        self.async_training = True
+        self.create_target_network = True
+
+
+
[docs]class NAFAlgorithmParameters(AlgorithmParameters): + def __init__(self): + super().__init__() + self.num_consecutive_training_steps = 5 + self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1) + self.rate_for_copying_weights_to_target = 0.001
+ + +class NAFAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=NAFAlgorithmParameters(), + exploration=OUProcessParameters(), + memory=EpisodicExperienceReplayParameters(), + networks={"main": NAFNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.naf_agent:NAFAgent' + + +# Normalized Advantage Functions - https://arxiv.org/pdf/1603.00748.pdf +class NAFAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.l_values = self.register_signal("L") + self.a_values = self.register_signal("Advantage") + self.mu_values = self.register_signal("Action") + self.v_values = self.register_signal("V") + self.TD_targets = self.register_signal("TD targets") + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # TD error = r + discount*v_st_plus_1 - q_st + v_st_plus_1 = self.networks['main'].target_network.predict( + batch.next_states(network_keys), + self.networks['main'].target_network.output_heads[0].V, + squeeze_output=False, + ) + TD_targets = np.expand_dims(batch.rewards(), -1) + \ + (1.0 - np.expand_dims(batch.game_overs(), -1)) * self.ap.algorithm.discount * v_st_plus_1 + + self.TD_targets.add_sample(TD_targets) + + result = self.networks['main'].train_and_sync_networks({**batch.states(network_keys), + 'output_0_0': batch.actions(len(batch.actions().shape) == 1) + }, TD_targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads + + def choose_action(self, curr_state): + if type(self.spaces.action) != BoxActionSpace: + raise ValueError('NAF works only for continuous control problems') + + # convert to batch so we can run it through the network + tf_input_state = self.prepare_batch_for_inference(curr_state, 'main') + naf_head = self.networks['main'].online_network.output_heads[0] + action_values = self.networks['main'].online_network.predict(tf_input_state, outputs=naf_head.mu, + squeeze_output=False) + + # get the actual action to use + action = self.exploration_policy.get_action(action_values) + + # get the internal values for logging + outputs = [naf_head.mu, naf_head.Q, naf_head.L, naf_head.A, naf_head.V] + result = self.networks['main'].online_network.predict( + {**tf_input_state, 'output_0_0': action_values}, + outputs=outputs + ) + mu, Q, L, A, V = result + + # store the q values statistics for logging + self.q_values.add_sample(Q) + self.l_values.add_sample(L) + self.a_values.add_sample(A) + self.mu_values.add_sample(mu) + self.v_values.add_sample(V) + + action_info = ActionInfo(action=action, action_value=Q) + + return action_info +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/nec_agent.html b/docs/_modules/rl_coach/agents/nec_agent.html new file mode 100644 index 0000000..8b63939 --- /dev/null +++ b/docs/_modules/rl_coach/agents/nec_agent.html @@ -0,0 +1,435 @@ + + + + + + + + + + + rl_coach.agents.nec_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.nec_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.nec_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import pickle
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import DNDQHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters
+
+from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType
+from rl_coach.exploration_policies.e_greedy import EGreedyParameters
+from rl_coach.logger import screen
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, MemoryGranularity
+from rl_coach.schedules import ConstantSchedule
+
+
+class NECNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
+        self.middleware_parameters = FCMiddlewareParameters()
+        self.heads_parameters = [DNDQHeadParameters()]
+        self.optimizer_type = 'Adam'
+
+
+
[docs]class NECAlgorithmParameters(AlgorithmParameters): + """ + :param dnd_size: (int) + Defines the number of transitions that will be stored in each one of the DNDs. Note that the total number + of transitions that will be stored is dnd_size x num_actions. + + :param l2_norm_added_delta: (float) + A small value that will be added when calculating the weight of each of the DND entries. This follows the + :math:`\delta` patameter defined in the paper. + + :param new_value_shift_coefficient: (float) + In the case where a ew embedding that was added to the DND was already present, the value that will be stored + in the DND is a mix between the existing value and the new value. The mix rate is defined by + new_value_shift_coefficient. + + :param number_of_knn: (int) + The number of neighbors that will be retrieved for each DND query. + + :param DND_key_error_threshold: (float) + When the DND is queried for a specific embedding, this threshold will be used to determine if the embedding + exists in the DND, since exact matches of embeddings are very rare. + + :param propagate_updates_to_DND: (bool) + If set to True, when the gradients of the network will be calculated, the gradients will also be + backpropagated through the keys of the DND. The keys will then be updated as well, as if they were regular + network weights. + + :param n_step: (int) + The bootstrap length that will be used when calculating the state values to store in the DND. + + :param bootstrap_total_return_from_old_policy: (bool) + If set to True, the bootstrap that will be used to calculate each state-action value, is the network value + when the state was first seen, and not the latest, most up-to-date network value. + """ + def __init__(self): + super().__init__() + self.dnd_size = 500000 + self.l2_norm_added_delta = 0.001 + self.new_value_shift_coefficient = 0.1 + self.number_of_knn = 50 + self.DND_key_error_threshold = 0 + self.num_consecutive_playing_steps = EnvironmentSteps(4) + self.propagate_updates_to_DND = False + self.n_step = 100 + self.bootstrap_total_return_from_old_policy = True
+ + +class NECMemoryParameters(EpisodicExperienceReplayParameters): + def __init__(self): + super().__init__() + self.max_size = (MemoryGranularity.Transitions, 100000) + + +class NECAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=NECAlgorithmParameters(), + exploration=EGreedyParameters(), + memory=NECMemoryParameters(), + networks={"main": NECNetworkParameters()}) + self.exploration.epsilon_schedule = ConstantSchedule(0.1) + self.exploration.evaluation_epsilon = 0.01 + + @property + def path(self): + return 'rl_coach.agents.nec_agent:NECAgent' + + +# Neural Episodic Control - https://arxiv.org/pdf/1703.01988.pdf +class NECAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.current_episode_state_embeddings = [] + self.training_started = False + self.current_episode_buffer = \ + Episode(discount=self.ap.algorithm.discount, + n_step=self.ap.algorithm.n_step, + bootstrap_total_return_from_old_policy=self.ap.algorithm.bootstrap_total_return_from_old_policy) + + def learn_from_batch(self, batch): + if not self.networks['main'].online_network.output_heads[0].DND.has_enough_entries(self.ap.algorithm.number_of_knn): + return 0, [], 0 + else: + if not self.training_started: + self.training_started = True + screen.log_title("Finished collecting initial entries in DND. Starting to train network...") + + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + TD_targets = self.networks['main'].online_network.predict(batch.states(network_keys)) + bootstrapped_return_from_old_policy = batch.n_step_discounted_rewards() + # only update the action that we have actually done in this transition + for i in range(self.ap.network_wrappers['main'].batch_size): + TD_targets[i, batch.actions()[i]] = bootstrapped_return_from_old_policy[i] + + # set the gradients to fetch for the DND update + fetches = [] + head = self.networks['main'].online_network.output_heads[0] + if self.ap.algorithm.propagate_updates_to_DND: + fetches = [head.dnd_embeddings_grad, head.dnd_values_grad, head.dnd_indices] + + # train the neural network + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets, fetches) + + total_loss, losses, unclipped_grads = result[:3] + + # update the DND keys and values using the extracted gradients + if self.ap.algorithm.propagate_updates_to_DND: + embedding_gradients = np.swapaxes(result[-1][0], 0, 1) + value_gradients = np.swapaxes(result[-1][1], 0, 1) + indices = np.swapaxes(result[-1][2], 0, 1) + head.DND.update_keys_and_values(batch.actions(), embedding_gradients, value_gradients, indices) + + return total_loss, losses, unclipped_grads + + def act(self): + if self.phase == RunPhase.HEATUP: + # get embedding in heatup (otherwise we get it through get_prediction) + embedding = self.networks['main'].online_network.predict( + self.prepare_batch_for_inference(self.curr_state, 'main'), + outputs=self.networks['main'].online_network.state_embedding) + self.current_episode_state_embeddings.append(embedding) + + return super().act() + + def get_all_q_values_for_states(self, states: StateType): + # we need to store the state embeddings regardless if the action is random or not + return self.get_prediction(states) + + def get_prediction(self, states): + # get the actions q values and the state embedding + embedding, actions_q_values = self.networks['main'].online_network.predict( + self.prepare_batch_for_inference(states, 'main'), + outputs=[self.networks['main'].online_network.state_embedding, + self.networks['main'].online_network.output_heads[0].output] + ) + if self.phase != RunPhase.TEST: + # store the state embedding for inserting it to the DND later + self.current_episode_state_embeddings.append(embedding.squeeze()) + actions_q_values = actions_q_values[0][0] + return actions_q_values + + def reset_internal_state(self): + super().reset_internal_state() + self.current_episode_state_embeddings = [] + self.current_episode_buffer = \ + Episode(discount=self.ap.algorithm.discount, + n_step=self.ap.algorithm.n_step, + bootstrap_total_return_from_old_policy=self.ap.algorithm.bootstrap_total_return_from_old_policy) + + def handle_episode_ended(self): + super().handle_episode_ended() + + # get the last full episode that we have collected + episode = self.call_memory('get_last_complete_episode') + if episode is not None and self.phase != RunPhase.TEST: + assert len(self.current_episode_state_embeddings) == episode.length() + discounted_rewards = episode.get_transitions_attribute('n_step_discounted_rewards') + actions = episode.get_transitions_attribute('action') + self.networks['main'].online_network.output_heads[0].DND.add(self.current_episode_state_embeddings, + actions, discounted_rewards) + + def save_checkpoint(self, checkpoint_id): + with open(os.path.join(self.ap.task_parameters.checkpoint_save_dir, str(checkpoint_id) + '.dnd'), 'wb') as f: + pickle.dump(self.networks['main'].online_network.output_heads[0].DND, f, pickle.HIGHEST_PROTOCOL) +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/pal_agent.html b/docs/_modules/rl_coach/agents/pal_agent.html new file mode 100644 index 0000000..0344d4c --- /dev/null +++ b/docs/_modules/rl_coach/agents/pal_agent.html @@ -0,0 +1,334 @@ + + + + + + + + + + + rl_coach.agents.pal_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.pal_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.pal_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNAlgorithmParameters
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+
+
+
[docs]class PALAlgorithmParameters(DQNAlgorithmParameters): + """ + :param pal_alpha: (float) + A factor that weights the amount by which the advantage learning update will be taken into account. + + :param persistent_advantage_learning: (bool) + If set to True, the persistent mode of advantage learning will be used, which encourages the agent to take + the same actions one after the other instead of changing actions. + + :param monte_carlo_mixing_rate: (float) + The amount of monte carlo values to mix into the targets of the network. The monte carlo values are just the + total discounted returns, and they can help reduce the time it takes for the network to update to the newly + seen values, since it is not based on bootstrapping the current network values. + """ + def __init__(self): + super().__init__() + self.pal_alpha = 0.9 + self.persistent_advantage_learning = False + self.monte_carlo_mixing_rate = 0.1
+ + +class PALAgentParameters(DQNAgentParameters): + def __init__(self): + super().__init__() + self.algorithm = PALAlgorithmParameters() + self.memory = EpisodicExperienceReplayParameters() + + @property + def path(self): + return 'rl_coach.agents.pal_agent:PALAgent' + + +# Persistent Advantage Learning - https://arxiv.org/pdf/1512.04860.pdf +class PALAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.alpha = agent_parameters.algorithm.pal_alpha + self.persistent = agent_parameters.algorithm.persistent_advantage_learning + self.monte_carlo_mixing_rate = agent_parameters.algorithm.monte_carlo_mixing_rate + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # next state values + q_st_plus_1_target, q_st_plus_1_online = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.next_states(network_keys)) + ]) + selected_actions = np.argmax(q_st_plus_1_online, 1) + v_st_plus_1_target = np.max(q_st_plus_1_target, 1) + + # current state values + q_st_target, q_st_online = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + v_st_target = np.max(q_st_target, 1) + + # calculate TD error + TD_targets = np.copy(q_st_online) + total_returns = batch.n_step_discounted_rewards() + for i in range(self.ap.network_wrappers['main'].batch_size): + TD_targets[i, batch.actions()[i]] = batch.rewards()[i] + \ + (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * \ + q_st_plus_1_target[i][selected_actions[i]] + advantage_learning_update = v_st_target[i] - q_st_target[i, batch.actions()[i]] + next_advantage_learning_update = v_st_plus_1_target[i] - q_st_plus_1_target[i, selected_actions[i]] + # Persistent Advantage Learning or Regular Advantage Learning + if self.persistent: + TD_targets[i, batch.actions()[i]] -= self.alpha * min(advantage_learning_update, next_advantage_learning_update) + else: + TD_targets[i, batch.actions()[i]] -= self.alpha * advantage_learning_update + + # mixing monte carlo updates + monte_carlo_target = total_returns[i] + TD_targets[i, batch.actions()[i]] = (1 - self.monte_carlo_mixing_rate) * TD_targets[i, batch.actions()[i]] \ + + self.monte_carlo_mixing_rate * monte_carlo_target + + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/policy_gradients_agent.html b/docs/_modules/rl_coach/agents/policy_gradients_agent.html new file mode 100644 index 0000000..adf3150 --- /dev/null +++ b/docs/_modules/rl_coach/agents/policy_gradients_agent.html @@ -0,0 +1,356 @@ + + + + + + + + + + + rl_coach.agents.policy_gradients_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.policy_gradients_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.policy_gradients_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import PolicyHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \
+    AgentParameters
+
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.categorical import CategoricalParameters
+from rl_coach.logger import screen
+from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+
+
+class PolicyGradientNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
+        self.middleware_parameters = FCMiddlewareParameters()
+        self.heads_parameters = [PolicyHeadParameters()]
+        self.async_training = True
+
+
+
[docs]class PolicyGradientAlgorithmParameters(AlgorithmParameters): + """ + :param policy_gradient_rescaler: (PolicyGradientRescaler) + The rescaler type to use for the policy gradient loss. For policy gradients, we calculate log probability of + the action and then multiply it by the policy gradient rescaler. The most basic rescaler is the discounter + return, but there are other rescalers that are intended for reducing the variance of the updates. + + :param apply_gradients_every_x_episodes: (int) + The number of episodes between applying the accumulated gradients to the network. After every + num_steps_between_gradient_updates steps, the agent will calculate the gradients for the collected data, + it will then accumulate it in internal accumulators, and will only apply them to the network once in every + apply_gradients_every_x_episodes episodes. + + :param beta_entropy: (float) + A factor which defines the amount of entropy regularization to apply to the network. The entropy of the actions + will be added to the loss and scaled by the given beta factor. + + :param num_steps_between_gradient_updates: (int) + The number of steps between calculating gradients for the collected data. In the A3C paper, this parameter is + called t_max. Since this algorithm is on-policy, only the steps collected between each two gradient calculations + are used in the batch. + """ + def __init__(self): + super().__init__() + self.policy_gradient_rescaler = PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP + self.apply_gradients_every_x_episodes = 5 + self.beta_entropy = 0 + self.num_steps_between_gradient_updates = 20000 # this is called t_max in all the papers
+ + +class PolicyGradientsAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=PolicyGradientAlgorithmParameters(), + exploration={DiscreteActionSpace: CategoricalParameters(), + BoxActionSpace: AdditiveNoiseParameters()}, + memory=SingleEpisodeBufferParameters(), + networks={"main": PolicyGradientNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.policy_gradients_agent:PolicyGradientsAgent' + + +class PolicyGradientsAgent(PolicyOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.returns_mean = self.register_signal('Returns Mean') + self.returns_variance = self.register_signal('Returns Variance') + self.last_gradient_update_step_idx = 0 + + def learn_from_batch(self, batch): + # batch contains a list of episodes to learn from + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + total_returns = batch.n_step_discounted_rewards() + for i in reversed(range(batch.size)): + if self.policy_gradient_rescaler == PolicyGradientRescaler.TOTAL_RETURN: + total_returns[i] = total_returns[0] + elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN: + # just take the total return as it is + pass + elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_EPISODE: + # we can get a single transition episode while playing Doom Basic, causing the std to be 0 + if self.std_discounted_return != 0: + total_returns[i] = (total_returns[i] - self.mean_discounted_return) / self.std_discounted_return + else: + total_returns[i] = 0 + elif self.policy_gradient_rescaler == PolicyGradientRescaler.FUTURE_RETURN_NORMALIZED_BY_TIMESTEP: + total_returns[i] -= self.mean_return_over_multiple_episodes[i] + else: + screen.warning("WARNING: The requested policy gradient rescaler is not available") + + targets = total_returns + actions = batch.actions() + if type(self.spaces.action) != DiscreteActionSpace and len(actions.shape) < 2: + actions = np.expand_dims(actions, -1) + + self.returns_mean.add_sample(np.mean(total_returns)) + self.returns_variance.add_sample(np.std(total_returns)) + + result = self.networks['main'].online_network.accumulate_gradients( + {**batch.states(network_keys), 'output_0_0': actions}, targets + ) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/ppo_agent.html b/docs/_modules/rl_coach/agents/ppo_agent.html new file mode 100644 index 0000000..619ec6d --- /dev/null +++ b/docs/_modules/rl_coach/agents/ppo_agent.html @@ -0,0 +1,620 @@ + + + + + + + + + + + rl_coach.agents.ppo_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.ppo_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.ppo_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from collections import OrderedDict
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.actor_critic_agent import ActorCriticAgent
+from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler
+from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
+from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \
+    AgentParameters, DistributedTaskParameters
+
+from rl_coach.core_types import EnvironmentSteps, Batch
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.categorical import CategoricalParameters
+from rl_coach.logger import screen
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+from rl_coach.utils import force_list
+
+
+class PPOCriticNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
+        self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
+        self.heads_parameters = [VHeadParameters()]
+        self.async_training = True
+        self.l2_regularization = 0
+        self.create_target_network = True
+        self.batch_size = 128
+
+
+class PPOActorNetworkParameters(NetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
+        self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
+        self.heads_parameters = [PPOHeadParameters()]
+        self.optimizer_type = 'Adam'
+        self.async_training = True
+        self.l2_regularization = 0
+        self.create_target_network = True
+        self.batch_size = 128
+
+
+
[docs]class PPOAlgorithmParameters(AlgorithmParameters): + """ + :param policy_gradient_rescaler: (PolicyGradientRescaler) + This represents how the critic will be used to update the actor. The critic value function is typically used + to rescale the gradients calculated by the actor. There are several ways for doing this, such as using the + advantage of the action, or the generalized advantage estimation (GAE) value. + + :param gae_lambda: (float) + The :math:`\lambda` value is used within the GAE function in order to weight different bootstrap length + estimations. Typical values are in the range 0.9-1, and define an exponential decay over the different + n-step estimations. + + :param target_kl_divergence: (float) + The target kl divergence between the current policy distribution and the new policy. PPO uses a heuristic to + bring the KL divergence to this value, by adding a penalty if the kl divergence is higher. + + :param initial_kl_coefficient: (float) + The initial weight that will be given to the KL divergence between the current and the new policy in the + regularization factor. + + :param high_kl_penalty_coefficient: (float) + The penalty that will be given for KL divergence values which are highes than what was defined as the target. + + :param clip_likelihood_ratio_using_epsilon: (float) + If not None, the likelihood ratio between the current and new policy in the PPO loss function will be + clipped to the range [1-clip_likelihood_ratio_using_epsilon, 1+clip_likelihood_ratio_using_epsilon]. + This is typically used in the Clipped PPO version of PPO, and should be set to None in regular PPO + implementations. + + :param value_targets_mix_fraction: (float) + The targets for the value network are an exponential weighted moving average which uses this mix fraction to + define how much of the new targets will be taken into account when calculating the loss. + This value should be set to the range (0,1], where 1 means that only the new targets will be taken into account. + + :param estimate_state_value_using_gae: (bool) + If set to True, the state value will be estimated using the GAE technique. + + :param use_kl_regularization: (bool) + If set to True, the loss function will be regularized using the KL diveregence between the current and new + policy, to bound the change of the policy during the network update. + + :param beta_entropy: (float) + An entropy regulaization term can be added to the loss function in order to control exploration. This term + is weighted using the :math:`\beta` value defined by beta_entropy. + + """ + def __init__(self): + super().__init__() + self.policy_gradient_rescaler = PolicyGradientRescaler.GAE + self.gae_lambda = 0.96 + self.target_kl_divergence = 0.01 + self.initial_kl_coefficient = 1.0 + self.high_kl_penalty_coefficient = 1000 + self.clip_likelihood_ratio_using_epsilon = None + self.value_targets_mix_fraction = 0.1 + self.estimate_state_value_using_gae = True + self.use_kl_regularization = True + self.beta_entropy = 0.01 + self.num_consecutive_playing_steps = EnvironmentSteps(5000)
+ + +class PPOAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=PPOAlgorithmParameters(), + exploration={DiscreteActionSpace: CategoricalParameters(), + BoxActionSpace: AdditiveNoiseParameters()}, + memory=EpisodicExperienceReplayParameters(), + networks={"critic": PPOCriticNetworkParameters(), "actor": PPOActorNetworkParameters()}) + + @property + def path(self): + return 'rl_coach.agents.ppo_agent:PPOAgent' + + +# Proximal Policy Optimization - https://arxiv.org/pdf/1707.06347.pdf +class PPOAgent(ActorCriticAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + + # signals definition + self.value_loss = self.register_signal('Value Loss') + self.policy_loss = self.register_signal('Policy Loss') + self.kl_divergence = self.register_signal('KL Divergence') + self.total_kl_divergence_during_training_process = 0.0 + self.unclipped_grads = self.register_signal('Grads (unclipped)') + + def fill_advantages(self, batch): + batch = Batch(batch) + network_keys = self.ap.network_wrappers['critic'].input_embedders_parameters.keys() + + # * Found not to have any impact * + # current_states_with_timestep = self.concat_state_and_timestep(batch) + + current_state_values = self.networks['critic'].online_network.predict(batch.states(network_keys)).squeeze() + total_returns = batch.n_step_discounted_rewards() + # calculate advantages + advantages = [] + if self.policy_gradient_rescaler == PolicyGradientRescaler.A_VALUE: + advantages = total_returns - current_state_values + elif self.policy_gradient_rescaler == PolicyGradientRescaler.GAE: + # get bootstraps + episode_start_idx = 0 + advantages = np.array([]) + # current_state_values[batch.game_overs()] = 0 + for idx, game_over in enumerate(batch.game_overs()): + if game_over: + # get advantages for the rollout + value_bootstrapping = np.zeros((1,)) + rollout_state_values = np.append(current_state_values[episode_start_idx:idx+1], value_bootstrapping) + + rollout_advantages, _ = \ + self.get_general_advantage_estimation_values(batch.rewards()[episode_start_idx:idx+1], + rollout_state_values) + episode_start_idx = idx + 1 + advantages = np.append(advantages, rollout_advantages) + else: + screen.warning("WARNING: The requested policy gradient rescaler is not available") + + # standardize + advantages = (advantages - np.mean(advantages)) / np.std(advantages) + + # TODO: this will be problematic with a shared memory + for transition, advantage in zip(self.memory.transitions, advantages): + transition.info['advantage'] = advantage + + self.action_advantages.add_sample(advantages) + + def train_value_network(self, dataset, epochs): + loss = [] + batch = Batch(dataset) + network_keys = self.ap.network_wrappers['critic'].input_embedders_parameters.keys() + + # * Found not to have any impact * + # add a timestep to the observation + # current_states_with_timestep = self.concat_state_and_timestep(dataset) + + mix_fraction = self.ap.algorithm.value_targets_mix_fraction + total_returns = batch.n_step_discounted_rewards(True) + for j in range(epochs): + curr_batch_size = batch.size + if self.networks['critic'].online_network.optimizer_type != 'LBFGS': + curr_batch_size = self.ap.network_wrappers['critic'].batch_size + for i in range(batch.size // curr_batch_size): + # split to batches for first order optimization techniques + current_states_batch = { + k: v[i * curr_batch_size:(i + 1) * curr_batch_size] + for k, v in batch.states(network_keys).items() + } + total_return_batch = total_returns[i * curr_batch_size:(i + 1) * curr_batch_size] + old_policy_values = force_list(self.networks['critic'].target_network.predict( + current_states_batch).squeeze()) + if self.networks['critic'].online_network.optimizer_type != 'LBFGS': + targets = total_return_batch + else: + current_values = self.networks['critic'].online_network.predict(current_states_batch) + targets = current_values * (1 - mix_fraction) + total_return_batch * mix_fraction + + inputs = copy.copy(current_states_batch) + for input_index, input in enumerate(old_policy_values): + name = 'output_0_{}'.format(input_index) + if name in self.networks['critic'].online_network.inputs: + inputs[name] = input + + value_loss = self.networks['critic'].online_network.accumulate_gradients(inputs, targets) + + self.networks['critic'].apply_gradients_to_online_network() + if isinstance(self.ap.task_parameters, DistributedTaskParameters): + self.networks['critic'].apply_gradients_to_global_network() + self.networks['critic'].online_network.reset_accumulated_gradients() + + loss.append([value_loss[0]]) + loss = np.mean(loss, 0) + return loss + + def concat_state_and_timestep(self, dataset): + current_states_with_timestep = [np.append(transition.state['observation'], transition.info['timestep']) + for transition in dataset] + current_states_with_timestep = np.expand_dims(current_states_with_timestep, -1) + return current_states_with_timestep + + def train_policy_network(self, dataset, epochs): + loss = [] + for j in range(epochs): + loss = { + 'total_loss': [], + 'policy_losses': [], + 'unclipped_grads': [], + 'fetch_result': [] + } + #shuffle(dataset) + for i in range(len(dataset) // self.ap.network_wrappers['actor'].batch_size): + batch = Batch(dataset[i * self.ap.network_wrappers['actor'].batch_size: + (i + 1) * self.ap.network_wrappers['actor'].batch_size]) + + network_keys = self.ap.network_wrappers['actor'].input_embedders_parameters.keys() + + advantages = batch.info('advantage') + actions = batch.actions() + if not isinstance(self.spaces.action, DiscreteActionSpace) and len(actions.shape) == 1: + actions = np.expand_dims(actions, -1) + + # get old policy probabilities and distribution + old_policy = force_list(self.networks['actor'].target_network.predict(batch.states(network_keys))) + + # calculate gradients and apply on both the local policy network and on the global policy network + fetches = [self.networks['actor'].online_network.output_heads[0].kl_divergence, + self.networks['actor'].online_network.output_heads[0].entropy] + + inputs = copy.copy(batch.states(network_keys)) + inputs['output_0_0'] = actions + + # old_policy_distribution needs to be represented as a list, because in the event of discrete controls, + # it has just a mean. otherwise, it has both a mean and standard deviation + for input_index, input in enumerate(old_policy): + inputs['output_0_{}'.format(input_index + 1)] = input + + total_loss, policy_losses, unclipped_grads, fetch_result =\ + self.networks['actor'].online_network.accumulate_gradients( + inputs, [advantages], additional_fetches=fetches) + + self.networks['actor'].apply_gradients_to_online_network() + if isinstance(self.ap.task_parameters, DistributedTaskParameters): + self.networks['actor'].apply_gradients_to_global_network() + + self.networks['actor'].online_network.reset_accumulated_gradients() + + loss['total_loss'].append(total_loss) + loss['policy_losses'].append(policy_losses) + loss['unclipped_grads'].append(unclipped_grads) + loss['fetch_result'].append(fetch_result) + + self.unclipped_grads.add_sample(unclipped_grads) + + for key in loss.keys(): + loss[key] = np.mean(loss[key], 0) + + if self.ap.network_wrappers['critic'].learning_rate_decay_rate != 0: + curr_learning_rate = self.networks['critic'].online_network.get_variable_value(self.ap.learning_rate) + self.curr_learning_rate.add_sample(curr_learning_rate) + else: + curr_learning_rate = self.ap.network_wrappers['critic'].learning_rate + + # log training parameters + screen.log_dict( + OrderedDict([ + ("Surrogate loss", loss['policy_losses'][0]), + ("KL divergence", loss['fetch_result'][0]), + ("Entropy", loss['fetch_result'][1]), + ("training epoch", j), + ("learning_rate", curr_learning_rate) + ]), + prefix="Policy training" + ) + + self.total_kl_divergence_during_training_process = loss['fetch_result'][0] + self.entropy.add_sample(loss['fetch_result'][1]) + self.kl_divergence.add_sample(loss['fetch_result'][0]) + return loss['total_loss'] + + def update_kl_coefficient(self): + # John Schulman takes the mean kl divergence only over the last epoch which is strange but we will follow + # his implementation for now because we know it works well + screen.log_title("KL = {}".format(self.total_kl_divergence_during_training_process)) + + # update kl coefficient + kl_target = self.ap.algorithm.target_kl_divergence + kl_coefficient = self.networks['actor'].online_network.get_variable_value( + self.networks['actor'].online_network.output_heads[0].kl_coefficient) + new_kl_coefficient = kl_coefficient + if self.total_kl_divergence_during_training_process > 1.3 * kl_target: + # kl too high => increase regularization + new_kl_coefficient *= 1.5 + elif self.total_kl_divergence_during_training_process < 0.7 * kl_target: + # kl too low => decrease regularization + new_kl_coefficient /= 1.5 + + # update the kl coefficient variable + if kl_coefficient != new_kl_coefficient: + self.networks['actor'].online_network.set_variable_value( + self.networks['actor'].online_network.output_heads[0].assign_kl_coefficient, + new_kl_coefficient, + self.networks['actor'].online_network.output_heads[0].kl_coefficient_ph) + + screen.log_title("KL penalty coefficient change = {} -> {}".format(kl_coefficient, new_kl_coefficient)) + + def post_training_commands(self): + if self.ap.algorithm.use_kl_regularization: + self.update_kl_coefficient() + + # clean memory + self.call_memory('clean') + + def _should_train_helper(self, wait_for_full_episode=True): + return super()._should_train_helper(True) + + def train(self): + loss = 0 + if self._should_train(wait_for_full_episode=True): + for network in self.networks.values(): + network.set_is_training(True) + + for training_step in range(self.ap.algorithm.num_consecutive_training_steps): + self.networks['actor'].sync() + self.networks['critic'].sync() + + dataset = self.memory.transitions + + self.fill_advantages(dataset) + + # take only the requested number of steps + dataset = dataset[:self.ap.algorithm.num_consecutive_playing_steps.num_steps] + + value_loss = self.train_value_network(dataset, 1) + policy_loss = self.train_policy_network(dataset, 10) + + self.value_loss.add_sample(value_loss) + self.policy_loss.add_sample(policy_loss) + + for network in self.networks.values(): + network.set_is_training(False) + + self.post_training_commands() + self.training_iteration += 1 + self.update_log() # should be done in order to update the data that has been accumulated * while not playing * + return np.append(value_loss, policy_loss) + + def get_prediction(self, states): + tf_input_state = self.prepare_batch_for_inference(states, "actor") + return self.networks['actor'].online_network.predict(tf_input_state) +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/qr_dqn_agent.html b/docs/_modules/rl_coach/agents/qr_dqn_agent.html new file mode 100644 index 0000000..e3bb992 --- /dev/null +++ b/docs/_modules/rl_coach/agents/qr_dqn_agent.html @@ -0,0 +1,347 @@ + + + + + + + + + + + rl_coach.agents.qr_dqn_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.qr_dqn_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.qr_dqn_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters
+from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent
+from rl_coach.architectures.head_parameters import QuantileRegressionQHeadParameters
+from rl_coach.core_types import StateType
+from rl_coach.schedules import LinearSchedule
+
+
+class QuantileRegressionDQNNetworkParameters(DQNNetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.heads_parameters = [QuantileRegressionQHeadParameters()]
+        self.learning_rate = 0.00005
+        self.optimizer_epsilon = 0.01 / 32
+
+
+
[docs]class QuantileRegressionDQNAlgorithmParameters(DQNAlgorithmParameters): + """ + :param atoms: (int) + the number of atoms to predict for each action + + :param huber_loss_interval: (float) + One of the huber loss parameters, and is referred to as :math:`\kapa` in the paper. + It describes the interval [-k, k] in which the huber loss acts as a MSE loss. + """ + def __init__(self): + super().__init__() + self.atoms = 200 + self.huber_loss_interval = 1 # called k in the paper
+ + +class QuantileRegressionDQNAgentParameters(DQNAgentParameters): + def __init__(self): + super().__init__() + self.algorithm = QuantileRegressionDQNAlgorithmParameters() + self.network_wrappers = {"main": QuantileRegressionDQNNetworkParameters()} + self.exploration.epsilon_schedule = LinearSchedule(1, 0.01, 1000000) + self.exploration.evaluation_epsilon = 0.001 + + @property + def path(self): + return 'rl_coach.agents.qr_dqn_agent:QuantileRegressionDQNAgent' + + +# Quantile Regression Deep Q Network - https://arxiv.org/pdf/1710.10044v1.pdf +class QuantileRegressionDQNAgent(ValueOptimizationAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + self.quantile_probabilities = np.ones(self.ap.algorithm.atoms) / float(self.ap.algorithm.atoms) + + def get_q_values(self, quantile_values): + return np.dot(quantile_values, self.quantile_probabilities) + + # prediction's format is (batch,actions,atoms) + def get_all_q_values_for_states(self, states: StateType): + if self.exploration_policy.requires_action_values(): + quantile_values = self.get_prediction(states) + actions_q_values = self.get_q_values(quantile_values) + else: + actions_q_values = None + return actions_q_values + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + # get the quantiles of the next states and current states + next_state_quantiles, current_quantiles = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + + # get the optimal actions to take for the next states + target_actions = np.argmax(self.get_q_values(next_state_quantiles), axis=1) + + # calculate the Bellman update + batch_idx = list(range(self.ap.network_wrappers['main'].batch_size)) + + TD_targets = batch.rewards(True) + (1.0 - batch.game_overs(True)) * self.ap.algorithm.discount \ + * next_state_quantiles[batch_idx, target_actions] + + # get the locations of the selected actions within the batch for indexing purposes + actions_locations = [[b, a] for b, a in zip(batch_idx, batch.actions())] + + # calculate the cumulative quantile probabilities and reorder them to fit the sorted quantiles order + cumulative_probabilities = np.array(range(self.ap.algorithm.atoms + 1)) / float(self.ap.algorithm.atoms) # tau_i + quantile_midpoints = 0.5*(cumulative_probabilities[1:] + cumulative_probabilities[:-1]) # tau^hat_i + quantile_midpoints = np.tile(quantile_midpoints, (self.ap.network_wrappers['main'].batch_size, 1)) + sorted_quantiles = np.argsort(current_quantiles[batch_idx, batch.actions()]) + for idx in range(self.ap.network_wrappers['main'].batch_size): + quantile_midpoints[idx, :] = quantile_midpoints[idx, sorted_quantiles[idx]] + + # train + result = self.networks['main'].train_and_sync_networks({ + **batch.states(network_keys), + 'output_0_0': actions_locations, + 'output_0_1': quantile_midpoints, + }, TD_targets) + total_loss, losses, unclipped_grads = result[:3] + + return total_loss, losses, unclipped_grads + +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html b/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html new file mode 100644 index 0000000..f71cbf9 --- /dev/null +++ b/docs/_modules/rl_coach/agents/rainbow_dqn_agent.html @@ -0,0 +1,359 @@ + + + + + + + + + + + rl_coach.agents.rainbow_dqn_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.rainbow_dqn_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.rainbow_dqn_agent

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAlgorithmParameters, \
+    CategoricalDQNAgent, CategoricalDQNAgentParameters
+from rl_coach.agents.dqn_agent import DQNNetworkParameters
+from rl_coach.architectures.head_parameters import RainbowQHeadParameters
+from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
+from rl_coach.base_parameters import MiddlewareScheme
+from rl_coach.exploration_policies.parameter_noise import ParameterNoiseParameters
+from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters, \
+    PrioritizedExperienceReplay
+
+
+class RainbowDQNNetworkParameters(DQNNetworkParameters):
+    def __init__(self):
+        super().__init__()
+        self.heads_parameters = [RainbowQHeadParameters()]
+        self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty)
+
+
+
[docs]class RainbowDQNAlgorithmParameters(CategoricalDQNAlgorithmParameters): + """ + :param n_step: (int) + The number of steps to bootstrap the network over. The first N-1 steps actual rewards will be accumulated + using an exponentially growing discount factor, and the Nth step will be bootstrapped from the network + prediction. + + :param store_transitions_only_when_episodes_are_terminated: (bool) + If set to True, the transitions will be stored in an Episode object until the episode ends, and just then + written to the memory. This is useful since we want to calculate the N-step discounted rewards before saving the + transitions into the memory, and to do so we need the entire episode first. + """ + def __init__(self): + super().__init__() + self.n_step = 3 + + # needed for n-step updates to work. i.e. waiting for a full episode to be closed before storing each transition + self.store_transitions_only_when_episodes_are_terminated = True
+ + +class RainbowDQNAgentParameters(CategoricalDQNAgentParameters): + def __init__(self): + super().__init__() + self.algorithm = RainbowDQNAlgorithmParameters() + self.exploration = ParameterNoiseParameters(self) + self.memory = PrioritizedExperienceReplayParameters() + self.network_wrappers = {"main": RainbowDQNNetworkParameters()} + + @property + def path(self): + return 'rl_coach.agents.rainbow_dqn_agent:RainbowDQNAgent' + + +# Rainbow Deep Q Network - https://arxiv.org/abs/1710.02298 +# Agent implementation is composed of: +# 1. NoisyNets +# 2. C51 +# 3. Prioritized ER +# 4. DDQN +# 5. Dueling DQN +# 6. N-step returns + +class RainbowDQNAgent(CategoricalDQNAgent): + def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): + super().__init__(agent_parameters, parent) + + def learn_from_batch(self, batch): + network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys() + + ddqn_selected_actions = np.argmax(self.distribution_prediction_to_q_values( + self.networks['main'].online_network.predict(batch.next_states(network_keys))), axis=1) + + # for the action we actually took, the error is calculated by the atoms distribution + # for all other actions, the error is 0 + distributional_q_st_plus_n, TD_targets = self.networks['main'].parallel_prediction([ + (self.networks['main'].target_network, batch.next_states(network_keys)), + (self.networks['main'].online_network, batch.states(network_keys)) + ]) + + # only update the action that we have actually done in this transition (using the Double-DQN selected actions) + target_actions = ddqn_selected_actions + m = np.zeros((self.ap.network_wrappers['main'].batch_size, self.z_values.size)) + + batches = np.arange(self.ap.network_wrappers['main'].batch_size) + for j in range(self.z_values.size): + # we use batch.info('should_bootstrap_next_state') instead of (1 - batch.game_overs()) since with n-step, + # we will not bootstrap for the last n-step transitions in the episode + tzj = np.fmax(np.fmin(batch.n_step_discounted_rewards() + batch.info('should_bootstrap_next_state') * + (self.ap.algorithm.discount ** self.ap.algorithm.n_step) * self.z_values[j], + self.z_values[-1]), self.z_values[0]) + bj = (tzj - self.z_values[0])/(self.z_values[1] - self.z_values[0]) + u = (np.ceil(bj)).astype(int) + l = (np.floor(bj)).astype(int) + m[batches, l] += (distributional_q_st_plus_n[batches, target_actions, j] * (u - bj)) + m[batches, u] += (distributional_q_st_plus_n[batches, target_actions, j] * (bj - l)) + + # total_loss = cross entropy between actual result above and predicted result for the given action + TD_targets[batches, batch.actions()] = m + + # update errors in prioritized replay buffer + importance_weights = batch.info('weight') if isinstance(self.memory, PrioritizedExperienceReplay) else None + + result = self.networks['main'].train_and_sync_networks(batch.states(network_keys), TD_targets, + importance_weights=importance_weights) + + total_loss, losses, unclipped_grads = result[:3] + + # TODO: fix this spaghetti code + if isinstance(self.memory, PrioritizedExperienceReplay): + errors = losses[0][np.arange(batch.size), batch.actions()] + self.call_memory('update_priorities', (batch.info('idx'), errors)) + + return total_loss, losses, unclipped_grads + +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/agents/value_optimization_agent.html b/docs/_modules/rl_coach/agents/value_optimization_agent.html new file mode 100644 index 0000000..88a4267 --- /dev/null +++ b/docs/_modules/rl_coach/agents/value_optimization_agent.html @@ -0,0 +1,325 @@ + + + + + + + + + + + rl_coach.agents.value_optimization_agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.agents.value_optimization_agent
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.agents.value_optimization_agent

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.agents.agent import Agent
+from rl_coach.core_types import ActionInfo, StateType
+from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplay
+from rl_coach.spaces import DiscreteActionSpace
+
+
+## This is an abstract agent - there is no learn_from_batch method ##
+
+
+class ValueOptimizationAgent(Agent):
+    def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None):
+        super().__init__(agent_parameters, parent)
+        self.q_values = self.register_signal("Q")
+        self.q_value_for_action = {}
+
+    def init_environment_dependent_modules(self):
+        super().init_environment_dependent_modules()
+        if isinstance(self.spaces.action, DiscreteActionSpace):
+            for i in range(len(self.spaces.action.actions)):
+                self.q_value_for_action[i] = self.register_signal("Q for action {}".format(i),
+                                                                  dump_one_value_per_episode=False,
+                                                                  dump_one_value_per_step=True)
+
+    # Algorithms for which q_values are calculated from predictions will override this function
+    def get_all_q_values_for_states(self, states: StateType):
+        if self.exploration_policy.requires_action_values():
+            actions_q_values = self.get_prediction(states)
+        else:
+            actions_q_values = None
+        return actions_q_values
+
+    def get_prediction(self, states):
+        return self.networks['main'].online_network.predict(self.prepare_batch_for_inference(states, 'main'))
+
+    def update_transition_priorities_and_get_weights(self, TD_errors, batch):
+        # update errors in prioritized replay buffer
+        importance_weights = None
+        if isinstance(self.memory, PrioritizedExperienceReplay):
+            self.call_memory('update_priorities', (batch.info('idx'), TD_errors))
+            importance_weights = batch.info('weight')
+        return importance_weights
+
+    def _validate_action(self, policy, action):
+        if np.array(action).shape != ():
+            raise ValueError((
+                'The exploration_policy {} returned a vector of actions '
+                'instead of a single action. ValueOptimizationAgents '
+                'require exploration policies which return a single action.'
+            ).format(policy.__class__.__name__))
+
+    def choose_action(self, curr_state):
+        actions_q_values = self.get_all_q_values_for_states(curr_state)
+
+        # choose action according to the exploration policy and the current phase (evaluating or training the agent)
+        action = self.exploration_policy.get_action(actions_q_values)
+        self._validate_action(self.exploration_policy, action)
+
+        if actions_q_values is not None:
+            # this is for bootstrapped dqn
+            if type(actions_q_values) == list and len(actions_q_values) > 0:
+                actions_q_values = self.exploration_policy.last_action_values
+            actions_q_values = actions_q_values.squeeze()
+
+            # store the q values statistics for logging
+            self.q_values.add_sample(actions_q_values)
+            for i, q_value in enumerate(actions_q_values):
+                self.q_value_for_action[i].add_sample(q_value)
+
+            action_info = ActionInfo(action=action,
+                                     action_value=actions_q_values[action],
+                                     max_action_value=np.max(actions_q_values))
+        else:
+            action_info = ActionInfo(action=action)
+
+        return action_info
+
+    def learn_from_batch(self, batch):
+        raise NotImplementedError("ValueOptimizationAgent is an abstract agent. Not to be used directly.")
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/architectures/architecture.html b/docs/_modules/rl_coach/architectures/architecture.html new file mode 100644 index 0000000..940f357 --- /dev/null +++ b/docs/_modules/rl_coach/architectures/architecture.html @@ -0,0 +1,442 @@ + + + + + + + + + + + rl_coach.architectures.architecture — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.architectures.architecture
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.architectures.architecture

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+
+from rl_coach.base_parameters import AgentParameters
+from rl_coach.spaces import SpacesDefinition
+
+
+
[docs]class Architecture(object): + def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, name: str= ""): + """ + Creates a neural network 'architecture', that can be trained and used for inference. + + :param agent_parameters: the agent parameters + :param spaces: the spaces (observation, action, etc.) definition of the agent + :param name: the name of the network + """ + self.spaces = spaces + self.name = name + self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main' + self.full_name = "{}/{}".format(agent_parameters.full_name_id, name) + self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name] + self.batch_size = self.network_parameters.batch_size + self.learning_rate = self.network_parameters.learning_rate + self.optimizer = None + self.ap = agent_parameters + +
[docs] def predict(self, + inputs: Dict[str, np.ndarray], + outputs: List[Any] = None, + squeeze_output: bool = True, + initial_feed_dict: Dict[Any, np.ndarray] = None) -> Tuple[np.ndarray, ...]: + """ + Given input observations, use the model to make predictions (e.g. action or value). + + :param inputs: current state (i.e. observations, measurements, goals, etc.) + (e.g. `{'observation': numpy.ndarray}` of shape (batch_size, observation_space_size)) + :param outputs: list of outputs to return. Return all outputs if unspecified. Type of the list elements + depends on the framework backend. + :param squeeze_output: call squeeze_list on output before returning if True + :param initial_feed_dict: a dictionary of extra inputs for forward pass. + :return: predictions of action or value of shape (batch_size, action_space_size) for action predictions) + """ + raise NotImplementedError
+ +
[docs] @staticmethod + def parallel_predict(sess: Any, + network_input_tuples: List[Tuple['Architecture', Dict[str, np.ndarray]]]) -> \ + Tuple[np.ndarray, ...]: + """ + :param sess: active session to use for prediction + :param network_input_tuples: tuple of network and corresponding input + :return: list or tuple of outputs from all networks + """ + raise NotImplementedError
+ +
[docs] def train_on_batch(self, + inputs: Dict[str, np.ndarray], + targets: List[np.ndarray], + scaler: float=1., + additional_fetches: list=None, + importance_weights: np.ndarray=None) -> Tuple[float, List[float], float, list]: + """ + Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a + forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to + update the weights. + Calls `accumulate_gradients` followed by `apply_and_reset_gradients`. + Note: Currently an unused method. + + :param inputs: typically the environment states (but can also contain other data necessary for loss). + (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or + (batch_size, observation_space_size, stack_size) or + `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) + :param targets: target values of shape (batch_size, ). For example discounted rewards for value network + for calculating the value-network loss would be a target. Length of list and order of arrays in + the list matches that of network losses which are defined by network parameters + :param scaler: value to scale gradients by before optimizing network weights + :param additional_fetches: list of additional values to fetch and return. The type of each list + element is framework dependent. + :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. + :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors + total_loss (float): sum of all head losses + losses (list of float): list of all losses. The order is list of target losses followed by list + of regularization losses. The specifics of losses is dependant on the network parameters + (number of heads, etc.) + norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied + fetched_tensors: all values for additional_fetches + """ + raise NotImplementedError
+ +
[docs] def get_weights(self) -> List[np.ndarray]: + """ + Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks. + + :return: list weights as ndarray + """ + raise NotImplementedError
+ +
[docs] def set_weights(self, weights: List[np.ndarray], rate: float=1.0) -> None: + """ + Sets model weights for provided layer parameters. + + :param weights: list of model weights in the same order as received in get_weights + :param rate: controls the mixture of given weight values versus old weight values. + i.e. new_weight = rate * given_weight + (1 - rate) * old_weight + :return: None + """ + raise NotImplementedError
+ +
[docs] def reset_accumulated_gradients(self) -> None: + """ + Sets gradient of all parameters to 0. + + Once gradients are reset, they must be accessible by `accumulated_gradients` property of this class, + which must return a list of numpy ndarrays. Child class must ensure that `accumulated_gradients` is set. + """ + raise NotImplementedError
+ +
[docs] def accumulate_gradients(self, + inputs: Dict[str, np.ndarray], + targets: List[np.ndarray], + additional_fetches: list=None, + importance_weights: np.ndarray=None, + no_accumulation: bool=False) -> Tuple[float, List[float], float, list]: + """ + Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the + gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient + values if required and then accumulate gradients from all learners. It does not update the model weights, + that's performed in `apply_and_reset_gradients` method. + + Once gradients are accumulated, they are accessed by `accumulated_gradients` property of this class.å + + :param inputs: typically the environment states (but can also contain other data for loss) + (e.g. `{'observation': numpy.ndarray}` with `observation` of shape (batch_size, observation_space_size) or + (batch_size, observation_space_size, stack_size) or + `{'observation': numpy.ndarray, 'output_0_0': numpy.ndarray}` with `output_0_0` of shape (batch_size,)) + :param targets: targets for calculating loss. For example discounted rewards for value network + for calculating the value-network loss would be a target. Length of list and order of arrays in + the list matches that of network losses which are defined by network parameters + :param additional_fetches: list of additional values to fetch and return. The type of each list + element is framework dependent. + :param importance_weights: ndarray of shape (batch_size,) to multiply with batch loss. + :param no_accumulation: if True, set gradient values to the new gradients, otherwise sum with previously + calculated gradients + :return: tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors + total_loss (float): sum of all head losses + losses (list of float): list of all losses. The order is list of target losses followed by list of + regularization losses. The specifics of losses is dependant on the network parameters + (number of heads, etc.) + norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied + fetched_tensors: all values for additional_fetches + """ + raise NotImplementedError
+ +
[docs] def apply_and_reset_gradients(self, gradients: List[np.ndarray], scaler: float=1.) -> None: + """ + Applies the given gradients to the network weights and resets the gradient accumulations. + Has the same impact as calling `apply_gradients`, then `reset_accumulated_gradients`. + + :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property + of an identical network (either self or another identical network) + :param scaler: A scaling factor that allows rescaling the gradients before applying them + """ + raise NotImplementedError
+ +
[docs] def apply_gradients(self, gradients: List[np.ndarray], scaler: float=1.) -> None: + """ + Applies the given gradients to the network weights. + Will be performed sync or async depending on `network_parameters.async_training` + + :param gradients: gradients for the parameter weights, taken from `accumulated_gradients` property + of an identical network (either self or another identical network) + :param scaler: A scaling factor that allows rescaling the gradients before applying them + """ + raise NotImplementedError
+ +
[docs] def get_variable_value(self, variable: Any) -> np.ndarray: + """ + Gets value of a specified variable. Type of variable is dependant on the framework. + Example of a variable is head.kl_coefficient, which could be a symbol for evaluation + or could be a string representing the value. + + :param variable: variable of interest + :return: value of the specified variable + """ + raise NotImplementedError
+ +
[docs] def set_variable_value(self, assign_op: Any, value: np.ndarray, placeholder: Any): + """ + Updates the value of a specified variable. Type of assign_op is dependant on the framework + and is a unique identifier for assigning value to a variable. For example an agent may use + head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder + (in the example above, placeholder would be head.kl_coefficient_ph). + + :param assign_op: a parameter representing the operation for assigning value to a specific variable + :param value: value of the specified variable used for update + :param placeholder: a placeholder for binding the value to assign_op. + """ + raise NotImplementedError
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/architectures/network_wrapper.html b/docs/_modules/rl_coach/architectures/network_wrapper.html new file mode 100644 index 0000000..24ec906 --- /dev/null +++ b/docs/_modules/rl_coach/architectures/network_wrapper.html @@ -0,0 +1,480 @@ + + + + + + + + + + + rl_coach.architectures.network_wrapper — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.architectures.network_wrapper
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.architectures.network_wrapper

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Tuple
+
+from rl_coach.base_parameters import Frameworks, AgentParameters
+from rl_coach.logger import failed_imports
+from rl_coach.spaces import SpacesDefinition
+try:
+    import tensorflow as tf
+    from rl_coach.architectures.tensorflow_components.general_network import GeneralTensorFlowNetwork
+except ImportError:
+    failed_imports.append("tensorflow")
+
+try:
+    import mxnet as mx
+    from rl_coach.architectures.mxnet_components.general_network import GeneralMxnetNetwork
+except ImportError:
+    failed_imports.append("mxnet")
+
+
+
[docs]class NetworkWrapper(object): + """ + The network wrapper contains multiple copies of the same network, each one with a different set of weights which is + updating in a different time scale. The network wrapper will always contain an online network. + It will contain an additional slow updating target network if it was requested by the user, + and it will contain a global network shared between different workers, if Coach is run in a single-node + multi-process distributed mode. The network wrapper contains functionality for managing these networks and syncing + between them. + """ + def __init__(self, agent_parameters: AgentParameters, has_target: bool, has_global: bool, name: str, + spaces: SpacesDefinition, replicated_device=None, worker_device=None): + self.ap = agent_parameters + self.network_parameters = self.ap.network_wrappers[name] + self.has_target = has_target + self.has_global = has_global + self.name = name + self.sess = None + + if self.network_parameters.framework == Frameworks.tensorflow: + if "tensorflow" not in failed_imports: + general_network = GeneralTensorFlowNetwork + else: + raise Exception('Install tensorflow before using it as framework') + elif self.network_parameters.framework == Frameworks.mxnet: + if "mxnet" not in failed_imports: + general_network = GeneralMxnetNetwork + else: + raise Exception('Install mxnet before using it as framework') + else: + raise Exception("{} Framework is not supported" + .format(Frameworks().to_string(self.network_parameters.framework))) + + with tf.variable_scope("{}/{}".format(self.ap.full_name_id, name)): + + # Global network - the main network shared between threads + self.global_network = None + if self.has_global: + # we assign the parameters of this network on the parameters server + with tf.device(replicated_device): + self.global_network = general_network(agent_parameters=agent_parameters, + name='{}/global'.format(name), + global_network=None, + network_is_local=False, + spaces=spaces, + network_is_trainable=True) + + # Online network - local copy of the main network used for playing + self.online_network = None + with tf.device(worker_device): + self.online_network = general_network(agent_parameters=agent_parameters, + name='{}/online'.format(name), + global_network=self.global_network, + network_is_local=True, + spaces=spaces, + network_is_trainable=True) + + # Target network - a local, slow updating network used for stabilizing the learning + self.target_network = None + if self.has_target: + with tf.device(worker_device): + self.target_network = general_network(agent_parameters=agent_parameters, + name='{}/target'.format(name), + global_network=self.global_network, + network_is_local=True, + spaces=spaces, + network_is_trainable=False) + +
[docs] def sync(self): + """ + Initializes the weights of the networks to match each other + + :return: + """ + self.update_online_network() + self.update_target_network()
+ +
[docs] def update_target_network(self, rate=1.0): + """ + Copy weights: online network >>> target network + + :param rate: the rate of copying the weights - 1 for copying exactly + """ + if self.target_network: + self.target_network.set_weights(self.online_network.get_weights(), rate)
+ +
[docs] def update_online_network(self, rate=1.0): + """ + Copy weights: global network >>> online network + + :param rate: the rate of copying the weights - 1 for copying exactly + """ + if self.global_network: + self.online_network.set_weights(self.global_network.get_weights(), rate)
+ +
[docs] def apply_gradients_to_global_network(self, gradients=None): + """ + Apply gradients from the online network on the global network + + :param gradients: optional gradients that will be used instead of teh accumulated gradients + :return: + """ + if gradients is None: + gradients = self.online_network.accumulated_gradients + if self.network_parameters.shared_optimizer: + self.global_network.apply_gradients(gradients) + else: + self.online_network.apply_gradients(gradients)
+ +
[docs] def apply_gradients_to_online_network(self, gradients=None): + """ + Apply gradients from the online network on itself + + :return: + """ + if gradients is None: + gradients = self.online_network.accumulated_gradients + self.online_network.apply_gradients(gradients)
+ +
[docs] def train_and_sync_networks(self, inputs, targets, additional_fetches=[], importance_weights=None): + """ + A generic training function that enables multi-threading training using a global network if necessary. + + :param inputs: The inputs for the network. + :param targets: The targets corresponding to the given inputs + :param additional_fetches: Any additional tensor the user wants to fetch + :param importance_weights: A coefficient for each sample in the batch, which will be used to rescale the loss + error of this sample. If it is not given, the samples losses won't be scaled + :return: The loss of the training iteration + """ + result = self.online_network.accumulate_gradients(inputs, targets, additional_fetches=additional_fetches, + importance_weights=importance_weights, no_accumulation=True) + self.apply_gradients_and_sync_networks(reset_gradients=False) + return result
+ +
[docs] def apply_gradients_and_sync_networks(self, reset_gradients=True): + """ + Applies the gradients accumulated in the online network to the global network or to itself and syncs the + networks if necessary + + :param reset_gradients: If set to True, the accumulated gradients wont be reset to 0 after applying them to + the network. this is useful when the accumulated gradients are overwritten instead + if accumulated by the accumulate_gradients function. this allows reducing time + complexity for this function by around 10% + """ + if self.global_network: + self.apply_gradients_to_global_network() + if reset_gradients: + self.online_network.reset_accumulated_gradients() + self.update_online_network() + else: + if reset_gradients: + self.online_network.apply_and_reset_gradients(self.online_network.accumulated_gradients) + else: + self.online_network.apply_gradients(self.online_network.accumulated_gradients)
+ +
[docs] def parallel_prediction(self, network_input_tuples: List[Tuple]): + """ + Run several network prediction in parallel. Currently this only supports running each of the network once. + + :param network_input_tuples: a list of tuples where the first element is the network (online_network, + target_network or global_network) and the second element is the inputs + :return: the outputs of all the networks in the same order as the inputs were given + """ + return type(self.online_network).parallel_predict(self.sess, network_input_tuples)
+ +
[docs] def get_local_variables(self): + """ + Get all the variables that are local to the thread + + :return: a list of all the variables that are local to the thread + """ + local_variables = [v for v in tf.local_variables() if self.online_network.name in v.name] + if self.has_target: + local_variables += [v for v in tf.local_variables() if self.target_network.name in v.name] + return local_variables
+ +
[docs] def get_global_variables(self): + """ + Get all the variables that are shared between threads + + :return: a list of all the variables that are shared between threads + """ + global_variables = [v for v in tf.global_variables() if self.global_network.name in v.name] + return global_variables
+ +
[docs] def set_is_training(self, state: bool): + """ + Set the phase of the network between training and testing + + :param state: The current state (True = Training, False = Testing) + :return: None + """ + self.online_network.set_is_training(state) + if self.has_target: + self.target_network.set_is_training(state)
+ + def set_session(self, sess): + self.sess = sess + self.online_network.set_session(sess) + if self.global_network: + self.global_network.set_session(sess) + if self.target_network: + self.target_network.set_session(sess) + + def __str__(self): + sub_networks = [] + if self.global_network: + sub_networks.append("global network") + if self.online_network: + sub_networks.append("online network") + if self.target_network: + sub_networks.append("target network") + + result = [] + result.append("Network: {}, Copies: {} ({})".format(self.name, len(sub_networks), ' | '.join(sub_networks))) + result.append("-"*len(result[-1])) + result.append(str(self.online_network)) + result.append("") + return '\n'.join(result)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/base_parameters.html b/docs/_modules/rl_coach/base_parameters.html new file mode 100644 index 0000000..adbd3d3 --- /dev/null +++ b/docs/_modules/rl_coach/base_parameters.html @@ -0,0 +1,801 @@ + + + + + + + + + + + rl_coach.base_parameters — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.base_parameters
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.base_parameters

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import inspect
+import json
+import os
+import sys
+import types
+from collections import OrderedDict
+from enum import Enum
+from typing import Dict, List, Union
+
+from rl_coach.core_types import TrainingSteps, EnvironmentSteps, GradientClippingMethod, RunPhase, \
+    SelectedPhaseOnlyDumpFilter, MaxDumpFilter
+from rl_coach.filters.filter import NoInputFilter
+
+
+class Frameworks(Enum):
+    tensorflow = "TensorFlow"
+    mxnet = "MXNet"
+
+
+class EmbedderScheme(Enum):
+    Empty = "Empty"
+    Shallow = "Shallow"
+    Medium = "Medium"
+    Deep = "Deep"
+
+
+class MiddlewareScheme(Enum):
+    Empty = "Empty"
+    Shallow = "Shallow"
+    Medium = "Medium"
+    Deep = "Deep"
+
+
+class EmbeddingMergerType(Enum):
+    Concat = 0
+    Sum = 1
+    #ConcatDepthWise = 2
+    #Multiply = 3
+
+
+# DistributedCoachSynchronizationType provides the synchronization type for distributed Coach.
+# The default value is None, which means the algorithm or preset cannot be used with distributed Coach.
+class DistributedCoachSynchronizationType(Enum):
+    # In SYNC mode, the trainer waits for all the experiences to be gathered from distributed rollout workers before
+    # training a new policy and the rollout workers wait for a new policy before gathering experiences.
+    SYNC = "sync"
+
+    # In ASYNC mode, the trainer doesn't wait for any set of experiences to be gathered from distributed rollout workers
+    # and the rollout workers continously gather experiences loading new policies, whenever they become available.
+    ASYNC = "async"
+
+
+def iterable_to_items(obj):
+    if isinstance(obj, dict) or isinstance(obj, OrderedDict) or isinstance(obj, types.MappingProxyType):
+        items = obj.items()
+    elif isinstance(obj, list):
+        items = enumerate(obj)
+    else:
+        raise ValueError("The given object is not a dict or a list")
+    return items
+
+
+def unfold_dict_or_list(obj: Union[Dict, List, OrderedDict]):
+    """
+    Recursively unfolds all the parameters in dictionaries and lists
+    :param obj: a dictionary or list to unfold
+    :return: the unfolded parameters dictionary
+    """
+    parameters = OrderedDict()
+    items = iterable_to_items(obj)
+    for k, v in items:
+        if isinstance(v, dict) or isinstance(v, list) or isinstance(v, OrderedDict):
+            if 'tensorflow.' not in str(v.__class__):
+                parameters[k] = unfold_dict_or_list(v)
+        elif 'tensorflow.' in str(v.__class__):
+            parameters[k] = v
+        elif hasattr(v, '__dict__'):
+            sub_params = v.__dict__
+            if '__objclass__' not in sub_params.keys():
+                try:
+                    parameters[k] = unfold_dict_or_list(sub_params)
+                except RecursionError:
+                    parameters[k] = sub_params
+                parameters[k]['__class__'] = v.__class__.__name__
+            else:
+                # unfolding this type of object will result in infinite recursion
+                parameters[k] = sub_params
+        else:
+            parameters[k] = v
+    if not isinstance(obj, OrderedDict) and not isinstance(obj, list):
+        parameters = OrderedDict(sorted(parameters.items()))
+    return parameters
+
+
+class Parameters(object):
+    def __setattr__(self, key, value):
+        caller_name = sys._getframe(1).f_code.co_name
+
+        if caller_name != '__init__' and not hasattr(self, key):
+            raise TypeError("Parameter '{}' does not exist in {}. Parameters are only to be defined in a constructor of"
+                            " a class inheriting from Parameters. In order to explicitly register a new parameter "
+                            "outside of a constructor use register_var().".
+                            format(key, self.__class__))
+        object.__setattr__(self, key, value)
+
+    @property
+    def path(self):
+        if hasattr(self, 'parameterized_class_name'):
+            module_path = os.path.relpath(inspect.getfile(self.__class__), os.getcwd())[:-3] + '.py'
+
+            return ':'.join([module_path, self.parameterized_class_name])
+        else:
+            raise ValueError("The parameters class does not have an attached class it parameterizes. "
+                             "The self.parameterized_class_name should be set to the parameterized class.")
+
+    def register_var(self, key, value):
+        if hasattr(self, key):
+            raise TypeError("Cannot register an already existing parameter '{}'. ".format(key))
+        object.__setattr__(self, key, value)
+
+    def __str__(self):
+        result = "\"{}\" {}\n".format(self.__class__.__name__,
+                                   json.dumps(unfold_dict_or_list(self.__dict__), indent=4, default=repr))
+        return result
+
+
+class AlgorithmParameters(Parameters):
+    def __init__(self):
+        # Architecture parameters
+        self.use_accumulated_reward_as_measurement = False
+
+        # Agent parameters
+        self.num_consecutive_playing_steps = EnvironmentSteps(1)
+        self.num_consecutive_training_steps = 1  # TODO: update this to TrainingSteps
+
+        self.heatup_using_network_decisions = False
+        self.discount = 0.99
+        self.apply_gradients_every_x_episodes = 5
+        self.num_steps_between_copying_online_weights_to_target = TrainingSteps(0)
+        self.rate_for_copying_weights_to_target = 1.0
+        self.load_memory_from_file_path = None
+        self.store_transitions_only_when_episodes_are_terminated = False
+
+        # HRL / HER related params
+        self.in_action_space = None
+
+        # distributed agents params
+        self.share_statistics_between_workers = True
+
+        # intrinsic reward
+        self.scale_external_reward_by_intrinsic_reward_value = False
+
+        # n-step returns
+        self.n_step = -1  # calculate the total return (no bootstrap, by default)
+
+        # Distributed Coach params
+        self.distributed_coach_synchronization_type = None
+
+
+
[docs]class PresetValidationParameters(Parameters): + def __init__(self, + test=False, + min_reward_threshold=0, + max_episodes_to_achieve_reward=1, + num_workers=1, + reward_test_level=None, + test_using_a_trace_test=True, + trace_test_levels=None, + trace_max_env_steps=5000): + """ + :param test: + A flag which specifies if the preset should be tested as part of the validation process. + :param min_reward_threshold: + The minimum reward that the agent should pass after max_episodes_to_achieve_reward episodes when the + preset is run. + :param max_episodes_to_achieve_reward: + The maximum number of episodes that the agent should train using the preset in order to achieve the + reward specified by min_reward_threshold. + :param num_workers: + The number of workers that should be used when running this preset in the test suite for validation. + :param reward_test_level: + The environment level or levels, given by a list of strings, that should be tested as part of the + reward tests suite. + :param test_using_a_trace_test: + A flag that specifies if the preset should be run as part of the trace tests suite. + :param trace_test_levels: + The environment level or levels, given by a list of strings, that should be tested as part of the + trace tests suite. + :param trace_max_env_steps: + An integer representing the maximum number of environment steps to run when running this preset as part + of the trace tests suite. + """ + super().__init__() + + # setting a seed will only work for non-parallel algorithms. Parallel algorithms add uncontrollable noise in + # the form of different workers starting at different times, and getting different assignments of CPU + # time from the OS. + + # Testing parameters + self.test = test + self.min_reward_threshold = min_reward_threshold + self.max_episodes_to_achieve_reward = max_episodes_to_achieve_reward + self.num_workers = num_workers + self.reward_test_level = reward_test_level + self.test_using_a_trace_test = test_using_a_trace_test + self.trace_test_levels = trace_test_levels + self.trace_max_env_steps = trace_max_env_steps
+ + +
[docs]class NetworkParameters(Parameters): + def __init__(self, + force_cpu=False, + async_training=False, + shared_optimizer=True, + scale_down_gradients_by_number_of_workers_for_sync_training=True, + clip_gradients=None, + gradients_clipping_method=GradientClippingMethod.ClipByGlobalNorm, + l2_regularization=0, + learning_rate=0.00025, + learning_rate_decay_rate=0, + learning_rate_decay_steps=0, + input_embedders_parameters={}, + embedding_merger_type=EmbeddingMergerType.Concat, + middleware_parameters=None, + heads_parameters=[], + use_separate_networks_per_head=False, + optimizer_type='Adam', + optimizer_epsilon=0.0001, + adam_optimizer_beta1=0.9, + adam_optimizer_beta2=0.99, + rms_prop_optimizer_decay=0.9, + batch_size=32, + replace_mse_with_huber_loss=False, + create_target_network=False, + tensorflow_support=True): + """ + :param force_cpu: + Force the neural networks to run on the CPU even if a GPU is available + :param async_training: + If set to True, asynchronous training will be used, meaning that each workers will progress in its own + speed, while not waiting for the rest of the workers to calculate their gradients. + :param shared_optimizer: + If set to True, a central optimizer which will be shared with all the workers will be used for applying + gradients to the network. Otherwise, each worker will have its own optimizer with its own internal + parameters that will only be affected by the gradients calculated by that worker + :param scale_down_gradients_by_number_of_workers_for_sync_training: + If set to True, in synchronous training, the gradients of each worker will be scaled down by the + number of workers. This essentially means that the gradients applied to the network are the average + of the gradients over all the workers. + :param clip_gradients: + A value that will be used for clipping the gradients of the network. If set to None, no gradient clipping + will be applied. Otherwise, the gradients will be clipped according to the gradients_clipping_method. + :param gradients_clipping_method: + A gradient clipping method, defined by a GradientClippingMethod enum, and that will be used to clip the + gradients of the network. This will only be used if the clip_gradients value is defined as a value other + than None. + :param l2_regularization: + A L2 regularization weight that will be applied to the network weights while calculating the loss function + :param learning_rate: + The learning rate for the network + :param learning_rate_decay_rate: + If this value is larger than 0, an exponential decay will be applied to the network learning rate. + The rate of the decay is defined by this parameter, and the number of training steps the decay will be + applied is defined by learning_rate_decay_steps. Notice that both parameters should be defined in order + for this to work correctly. + :param learning_rate_decay_steps: + If the learning_rate_decay_rate of the network is larger than 0, an exponential decay will be applied to + the network learning rate. The number of steps the decay will be applied is defined by this parameter. + Notice that both this parameter, as well as learning_rate_decay_rate should be defined in order for the + learning rate decay to work correctly. + :param input_embedders_parameters: + A dictionary mapping between input names and input embedders (InputEmbedderParameters) to use for the + network. Each of the keys is an input name as returned from the environment in the state. + For example, if the environment returns a state containing 'observation' and 'measurements', then + the keys for the input embedders dictionary can be either 'observation' to use the observation as input, + 'measurements' to use the measurements as input, or both. + The embedder type will be automatically selected according to the input type. Vector inputs will + produce a fully connected embedder, and image inputs will produce a convolutional embedder. + :param embedding_merger_type: + The type of embedding merging to use, given by one of the EmbeddingMergerType enum values. + This will be used to merge the outputs of all the input embedders into a single embbeding. + :param middleware_parameters: + The parameters of the middleware to use, given by a MiddlewareParameters object. + Each network will have only a single middleware embedder which will take the merged embeddings from the + input embedders and pass them through more neural network layers. + :param heads_parameters: + A list of heads for the network given by their corresponding HeadParameters. + Each network can have one or multiple network heads, where each one will take the output of the middleware + and make some additional computation on top of it. Additionally, each head calculates a weighted loss value, + and the loss values from all the heads will be summed later on. + :param use_separate_networks_per_head: + A flag that allows using different copies of the input embedders and middleware for each one of the heads. + Regularly, the heads will have a shared input, but in the case where use_separate_networks_per_head is set + to True, each one of the heads will get a different input. + :param optimizer_type: + A string specifying the optimizer type to use for updating the network. The available optimizers are + Adam, RMSProp and LBFGS. + :param optimizer_epsilon: + An internal optimizer parameter used for Adam and RMSProp. + :param adam_optimizer_beta1: + An beta1 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the + optimizer for the network. + :param adam_optimizer_beta2: + An beta2 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the + optimizer for the network. + :param rms_prop_optimizer_decay: + The decay value for the RMSProp optimizer, which will be used only in case the RMSProp optimizer was + selected for this network. + :param batch_size: + The batch size to use when updating the network. + :param replace_mse_with_huber_loss: + :param create_target_network: + If this flag is set to True, an additional copy of the network will be created and initialized with the + same weights as the online network. It can then be queried, and its weights can be synced from the + online network at will. + :param tensorflow_support: + A flag which specifies if the network is supported by the TensorFlow framework. + """ + super().__init__() + self.framework = Frameworks.tensorflow + self.sess = None + + # hardware parameters + self.force_cpu = force_cpu + + # distributed training options + self.async_training = async_training + self.shared_optimizer = shared_optimizer + self.scale_down_gradients_by_number_of_workers_for_sync_training = scale_down_gradients_by_number_of_workers_for_sync_training + + # regularization + self.clip_gradients = clip_gradients + self.gradients_clipping_method = gradients_clipping_method + self.l2_regularization = l2_regularization + + # learning rate + self.learning_rate = learning_rate + self.learning_rate_decay_rate = learning_rate_decay_rate + self.learning_rate_decay_steps = learning_rate_decay_steps + + # structure + self.input_embedders_parameters = input_embedders_parameters + self.embedding_merger_type = embedding_merger_type + self.middleware_parameters = middleware_parameters + self.heads_parameters = heads_parameters + self.use_separate_networks_per_head = use_separate_networks_per_head + self.optimizer_type = optimizer_type + self.optimizer_epsilon = optimizer_epsilon + self.adam_optimizer_beta1 = adam_optimizer_beta1 + self.adam_optimizer_beta2 = adam_optimizer_beta2 + self.rms_prop_optimizer_decay = rms_prop_optimizer_decay + self.batch_size = batch_size + self.replace_mse_with_huber_loss = replace_mse_with_huber_loss + self.create_target_network = create_target_network + + # Framework support + self.tensorflow_support = tensorflow_support
+ + +class NetworkComponentParameters(Parameters): + def __init__(self, dense_layer): + self.dense_layer = dense_layer + + +
[docs]class VisualizationParameters(Parameters): + def __init__(self, + print_networks_summary=False, + dump_csv=True, + dump_signals_to_csv_every_x_episodes=5, + dump_gifs=False, + dump_mp4=False, + video_dump_methods=None, + dump_in_episode_signals=False, + dump_parameters_documentation=True, + render=False, + native_rendering=False, + max_fps_for_human_control=10, + tensorboard=False, + add_rendered_image_to_env_response=False): + """ + :param print_networks_summary: + If set to True, a summary of all the networks structure will be printed at the beginning of the experiment + :param dump_csv: + If set to True, the logger will dump logs to a csv file once in every dump_signals_to_csv_every_x_episodes + episodes. The logs can be later used to visualize the training process using Coach Dashboard. + :param dump_signals_to_csv_every_x_episodes: + Defines the number of episodes between writing new data to the csv log files. Lower values can affect + performance, as writing to disk may take time, and it is done synchronously. + :param dump_gifs: + If set to True, GIF videos of the environment will be stored into the experiment directory according to + the filters defined in video_dump_methods. + :param dump_mp4: + If set to True, MP4 videos of the environment will be stored into the experiment directory according to + the filters defined in video_dump_methods. + :param dump_in_episode_signals: + If set to True, csv files will be dumped for each episode for inspecting different metrics within the + episode. This means that for each step in each episode, different metrics such as the reward, the + future return, etc. will be saved. Setting this to True may affect performance severely, and therefore + this should be used only for debugging purposes. + :param dump_parameters_documentation: + If set to True, a json file containing all the agent parameters will be saved in the experiment directory. + This may be very useful for inspecting the values defined for each parameters and making sure that all + the parameters are defined as expected. + :param render: + If set to True, the environment render function will be called for each step, rendering the image of the + environment. This may affect the performance of training, and is highly dependent on the environment. + By default, Coach uses PyGame to render the environment image instead of the environment specific rendered. + To change this, use the native_rendering flag. + :param native_rendering: + If set to True, the environment native renderer will be used for rendering the environment image. + In some cases this can be slower than rendering using PyGame through Coach, but in other cases the + environment opens its native renderer by default, so rendering with PyGame is an unnecessary overhead. + :param max_fps_for_human_control: + The maximum number of frames per second used while playing the environment as a human. This only has + effect while using the --play flag for Coach. + :param tensorboard: + If set to True, TensorBoard summaries will be stored in the experiment directory. This can later be + loaded in TensorBoard in order to visualize the training process. + :param video_dump_methods: + A list of dump methods that will be used as filters for deciding when to save videos. + The filters in the list will be checked one after the other until the first dump method that returns + false for should_dump() in the environment class. This list will only be used if dump_mp4 or dump_gif are + set to True. + :param add_rendered_image_to_env_response: + Some environments have a different observation compared to the one displayed while rendering. + For some cases it can be useful to pass the rendered image to the agent for visualization purposes. + If this flag is set to True, the rendered image will be added to the environment EnvResponse object, + which will be passed to the agent and allow using those images. + """ + super().__init__() + if video_dump_methods is None: + video_dump_methods = [SelectedPhaseOnlyDumpFilter(RunPhase.TEST), MaxDumpFilter()] + self.print_networks_summary = print_networks_summary + self.dump_csv = dump_csv + self.dump_gifs = dump_gifs + self.dump_mp4 = dump_mp4 + self.dump_signals_to_csv_every_x_episodes = dump_signals_to_csv_every_x_episodes + self.dump_in_episode_signals = dump_in_episode_signals + self.dump_parameters_documentation = dump_parameters_documentation + self.render = render + self.native_rendering = native_rendering + self.max_fps_for_human_control = max_fps_for_human_control + self.tensorboard = tensorboard + self.video_dump_filters = video_dump_methods + self.add_rendered_image_to_env_response = add_rendered_image_to_env_response
+ + +
[docs]class AgentParameters(Parameters): + def __init__(self, algorithm: AlgorithmParameters, exploration: 'ExplorationParameters', memory: 'MemoryParameters', + networks: Dict[str, NetworkParameters], visualization: VisualizationParameters=VisualizationParameters()): + """ + :param algorithm: + A class inheriting AlgorithmParameters. + The parameters used for the specific algorithm used by the agent. + These parameters can be later referenced in the agent implementation through self.ap.algorithm. + :param exploration: + Either a class inheriting ExplorationParameters or a dictionary mapping between action + space types and their corresponding ExplorationParameters. If a dictionary was used, + when the agent will be instantiated, the correct exploration policy parameters will be used + according to the real type of the environment action space. + These parameters will be used to instantiate the exporation policy. + :param memory: + A class inheriting MemoryParameters. It defines all the parameters used by the memory module. + :param networks: + A dictionary mapping between network names and their corresponding network parmeters, defined + as a class inheriting NetworkParameters. Each element will be used in order to instantiate + a NetworkWrapper class, and all the network wrappers will be stored in the agent under + self.network_wrappers. self.network_wrappers is a dict mapping between the network name that + was given in the networks dict, and the instantiated network wrapper. + :param visualization: + A class inheriting VisualizationParameters and defining various parameters that can be + used for visualization purposes, such as printing to the screen, rendering, and saving videos. + """ + super().__init__() + self.visualization = visualization + self.algorithm = algorithm + self.exploration = exploration + self.memory = memory + self.network_wrappers = networks + self.input_filter = None + self.output_filter = None + self.pre_network_filter = NoInputFilter() + self.full_name_id = None # TODO: do we really want to hold this parameter here? + self.name = None + self.is_a_highest_level_agent = True + self.is_a_lowest_level_agent = True + self.task_parameters = None + + @property + def path(self): + return 'rl_coach.agents.agent:Agent'
+ + +
[docs]class TaskParameters(Parameters): + def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: bool=False, use_cpu: bool=False, + experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, + checkpoint_save_dir=None, export_onnx_graph: bool=False): + """ + :param framework_type: deep learning framework type. currently only tensorflow is supported + :param evaluate_only: the task will be used only for evaluating the model + :param use_cpu: use the cpu for this task + :param experiment_path: the path to the directory which will store all the experiment outputs + :param seed: a seed to use for the random numbers generator + :param checkpoint_save_secs: the number of seconds between each checkpoint saving + :param checkpoint_restore_dir: the directory to restore the checkpoints from + :param checkpoint_save_dir: the directory to store the checkpoints in + :param export_onnx_graph: If set to True, this will export an onnx graph each time a checkpoint is saved + """ + self.framework_type = framework_type + self.task_index = 0 # TODO: not really needed + self.evaluate_only = evaluate_only + self.use_cpu = use_cpu + self.experiment_path = experiment_path + self.checkpoint_save_secs = checkpoint_save_secs + self.checkpoint_restore_dir = checkpoint_restore_dir + self.checkpoint_save_dir = checkpoint_save_dir + self.seed = seed + self.export_onnx_graph = export_onnx_graph
+ + +
[docs]class DistributedTaskParameters(TaskParameters): + def __init__(self, framework_type: Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str, + task_index: int, evaluate_only: bool=False, num_tasks: int=None, + num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None, + shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, + checkpoint_save_dir=None, export_onnx_graph: bool=False): + """ + :param framework_type: deep learning framework type. currently only tensorflow is supported + :param evaluate_only: the task will be used only for evaluating the model + :param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are + assigned + :param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned + :param job_type: the job type - either ps (short for parameters server) or worker + :param task_index: the index of the process + :param num_tasks: the number of total tasks that are running (not including the parameters server) + :param num_training_tasks: the number of tasks that are training (not including the parameters server) + :param use_cpu: use the cpu for this task + :param experiment_path: the path to the directory which will store all the experiment outputs + :param dnd: an external DND to use for NEC. This is a workaround needed for a shared DND not using the scratchpad. + :param seed: a seed to use for the random numbers generator + :param checkpoint_save_secs: the number of seconds between each checkpoint saving + :param checkpoint_restore_dir: the directory to restore the checkpoints from + :param checkpoint_save_dir: the directory to store the checkpoints in + :param export_onnx_graph: If set to True, this will export an onnx graph each time a checkpoint is saved + """ + super().__init__(framework_type=framework_type, evaluate_only=evaluate_only, use_cpu=use_cpu, + experiment_path=experiment_path, seed=seed, checkpoint_save_secs=checkpoint_save_secs, + checkpoint_restore_dir=checkpoint_restore_dir, checkpoint_save_dir=checkpoint_save_dir, + export_onnx_graph=export_onnx_graph) + self.parameters_server_hosts = parameters_server_hosts + self.worker_hosts = worker_hosts + self.job_type = job_type + self.task_index = task_index + self.num_tasks = num_tasks + self.num_training_tasks = num_training_tasks + self.device = None # the replicated device which will be used for the global parameters + self.worker_target = None + self.dnd = dnd + self.shared_memory_scratchpad = shared_memory_scratchpad
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/core_types.html b/docs/_modules/rl_coach/core_types.html new file mode 100644 index 0000000..a783c7d --- /dev/null +++ b/docs/_modules/rl_coach/core_types.html @@ -0,0 +1,1092 @@ + + + + + + + + + + + rl_coach.core_types — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +

Source code for rl_coach.core_types

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import copy
+from enum import Enum
+from random import shuffle
+from typing import List, Union, Dict, Any, Type
+
+import numpy as np
+
+from rl_coach.utils import force_list
+
+ActionType = Union[int, float, np.ndarray, List]
+GoalType = Union[None, np.ndarray]
+ObservationType = np.ndarray
+RewardType = Union[int, float, np.ndarray]
+StateType = Dict[str, np.ndarray]
+
+
+class GoalTypes(Enum):
+    Embedding = 1
+    EmbeddingChange = 2
+    Observation = 3
+    Measurements = 4
+
+
+# step methods
+
+class StepMethod(object):
+    def __init__(self, num_steps: int):
+        self._num_steps = self.num_steps = num_steps
+
+    @property
+    def num_steps(self) -> int:
+        return self._num_steps
+
+    @num_steps.setter
+    def num_steps(self, val: int) -> None:
+        self._num_steps = val
+
+
+class Frames(StepMethod):
+    def __init__(self, num_steps):
+        super().__init__(num_steps)
+
+
+class EnvironmentSteps(StepMethod):
+    def __init__(self, num_steps):
+        super().__init__(num_steps)
+
+
+class EnvironmentEpisodes(StepMethod):
+    def __init__(self, num_steps):
+        super().__init__(num_steps)
+
+
+class TrainingSteps(StepMethod):
+    def __init__(self, num_steps):
+        super().__init__(num_steps)
+
+
+class Time(StepMethod):
+    def __init__(self, num_steps):
+        super().__init__(num_steps)
+
+
+class PredictionType(object):
+    pass
+
+
+class VStateValue(PredictionType):
+    pass
+
+
+class QActionStateValue(PredictionType):
+    pass
+
+
+class ActionProbabilities(PredictionType):
+    pass
+
+
+class Embedding(PredictionType):
+    pass
+
+
+class InputEmbedding(Embedding):
+    pass
+
+
+class MiddlewareEmbedding(Embedding):
+    pass
+
+
+class InputImageEmbedding(InputEmbedding):
+    pass
+
+
+class InputVectorEmbedding(InputEmbedding):
+    pass
+
+
+class Middleware_FC_Embedding(MiddlewareEmbedding):
+    pass
+
+
+class Middleware_LSTM_Embedding(MiddlewareEmbedding):
+    pass
+
+
+class Measurements(PredictionType):
+    pass
+
+
+PlayingStepsType = Union[EnvironmentSteps, EnvironmentEpisodes, Frames]
+
+
+# run phases
+class RunPhase(Enum):
+    HEATUP = "Heatup"
+    TRAIN = "Training"
+    TEST = "Testing"
+    UNDEFINED = "Undefined"
+
+
+# transitions
+
+
[docs]class Transition(object): + def __init__(self, state: Dict[str, np.ndarray]=None, action: ActionType=None, reward: RewardType=None, + next_state: Dict[str, np.ndarray]=None, game_over: bool=None, info: Dict=None): + """ + A transition is a tuple containing the information of a single step of interaction + between the agent and the environment. The most basic version should contain the following values: + (current state, action, reward, next state, game over) + For imitation learning algorithms, if the reward, next state or game over is not known, + it is sufficient to store the current state and action taken by the expert. + + :param state: The current state. Assumed to be a dictionary where the observation + is located at state['observation'] + :param action: The current action that was taken + :param reward: The reward received from the environment + :param next_state: The next state of the environment after applying the action. + The next state should be similar to the state in its structure. + :param game_over: A boolean which should be True if the episode terminated after + the execution of the action. + :param info: A dictionary containing any additional information to be stored in the transition + """ + + self._state = self.state = state + self._action = self.action = action + self._reward = self.reward = reward + self._n_step_discounted_rewards = self.n_step_discounted_rewards = None + if not next_state: + next_state = state + self._next_state = self._next_state = next_state + self._game_over = self.game_over = game_over + if info is None: + self.info = {} + else: + self.info = info + + def __repr__(self): + return str(self.__dict__) + + @property + def state(self): + if self._state is None: + raise Exception("The state was not filled by any of the modules between the environment and the agent") + return self._state + + @state.setter + def state(self, val): + self._state = val + + @property + def action(self): + if self._action is None: + raise Exception("The action was not filled by any of the modules between the environment and the agent") + return self._action + + @action.setter + def action(self, val): + self._action = val + + @property + def reward(self): + + if self._reward is None: + raise Exception("The reward was not filled by any of the modules between the environment and the agent") + return self._reward + + @reward.setter + def reward(self, val): + self._reward = val + + @property + def n_step_discounted_rewards(self): + if self._n_step_discounted_rewards is None: + raise Exception("The n_step_discounted_rewards were not filled by any of the modules between the " + "environment and the agent. Make sure that you are using an episodic experience replay.") + return self._n_step_discounted_rewards + + @n_step_discounted_rewards.setter + def n_step_discounted_rewards(self, val): + self._n_step_discounted_rewards = val + + @property + def game_over(self): + if self._game_over is None: + raise Exception("The done flag was not filled by any of the modules between the environment and the agent") + return self._game_over + + @game_over.setter + def game_over(self, val): + self._game_over = val + + @property + def next_state(self): + if self._next_state is None: + raise Exception("The next state was not filled by any of the modules between the environment and the agent") + return self._next_state + + @next_state.setter + def next_state(self, val): + self._next_state = val + + def add_info(self, new_info: Dict[str, Any]) -> None: + if not new_info.keys().isdisjoint(self.info.keys()): + raise ValueError("The new info dictionary can not be appended to the existing info dictionary since there " + "are overlapping keys between the two. old keys: {}, new keys: {}" + .format(self.info.keys(), new_info.keys())) + self.info.update(new_info) + + def __copy__(self): + new_transition = type(self)() + new_transition.__dict__.update(self.__dict__) + new_transition.state = copy.copy(new_transition.state) + new_transition.next_state = copy.copy(new_transition.next_state) + new_transition.info = copy.copy(new_transition.info) + return new_transition
+ + +
[docs]class EnvResponse(object): + def __init__(self, next_state: Dict[str, ObservationType], reward: RewardType, game_over: bool, info: Dict=None, + goal: ObservationType=None): + """ + An env response is a collection containing the information returning from the environment after a single action + has been performed on it. + + :param next_state: The new state that the environment has transitioned into. Assumed to be a dictionary where the + observation is located at state['observation'] + :param reward: The reward received from the environment + :param game_over: A boolean which should be True if the episode terminated after + the execution of the action. + :param info: any additional info from the environment + :param goal: a goal defined by the environment + """ + self._next_state = self.next_state = next_state + self._reward = self.reward = reward + self._game_over = self.game_over = game_over + self._goal = self.goal = goal + if info is None: + self.info = {} + else: + self.info = info + + def __repr__(self): + return str(self.__dict__) + + @property + def next_state(self): + return self._next_state + + @next_state.setter + def next_state(self, val): + self._next_state = val + + @property + def reward(self): + return self._reward + + @reward.setter + def reward(self, val): + self._reward = val + + @property + def game_over(self): + return self._game_over + + @game_over.setter + def game_over(self, val): + self._game_over = val + + @property + def goal(self): + return self._goal + + @goal.setter + def goal(self, val): + self._goal = val + + def add_info(self, info: Dict[str, Any]) -> None: + if info.keys().isdisjoint(self.info.keys()): + raise ValueError("The new info dictionary can not be appended to the existing info dictionary since there" + "are overlapping keys between the two") + self.info.update(info)
+ + +
[docs]class ActionInfo(object): + """ + Action info is a class that holds an action and various additional information details about it + """ + + def __init__(self, action: ActionType, action_probability: float=0, + action_value: float=0., state_value: float=0., max_action_value: float=None, + action_intrinsic_reward: float=0): + """ + :param action: the action + :param action_probability: the probability that the action was given when selecting it + :param action_value: the state-action value (Q value) of the action + :param state_value: the state value (V value) of the state where the action was taken + :param max_action_value: in case this is an action that was selected randomly, this is the value of the action + that received the maximum value. if no value is given, the action is assumed to be the + action with the maximum value + :param action_intrinsic_reward: can contain any intrinsic reward that the agent wants to add to this action + selection + """ + self.action = action + self.action_probability = action_probability + self.action_value = action_value + self.state_value = state_value + if not max_action_value: + self.max_action_value = action_value + else: + self.max_action_value = max_action_value + self.action_intrinsic_reward = action_intrinsic_reward
+ + +
[docs]class Batch(object): + """ + A wrapper around a list of transitions that helps extracting batches of parameters from it. + For example, one can extract a list of states corresponding to the list of transitions. + The class uses lazy evaluation in order to return each of the available parameters. + """ + def __init__(self, transitions: List[Transition]): + """ + :param transitions: a list of transitions to extract the batch from + """ + self.transitions = transitions + self._states = {} + self._actions = None + self._rewards = None + self._n_step_discounted_rewards = None + self._game_overs = None + self._next_states = {} + self._goals = None + self._info = {} + +
[docs] def slice(self, start, end) -> None: + """ + Keep a slice from the batch and discard the rest of the batch + + :param start: the start index in the slice + :param end: the end index in the slice + :return: None + """ + + self.transitions = self.transitions[start:end] + for k, v in self._states.items(): + self._states[k] = v[start:end] + if self._actions is not None: + self._actions = self._actions[start:end] + if self._rewards is not None: + self._rewards = self._rewards[start:end] + if self._n_step_discounted_rewards is not None: + self._n_step_discounted_rewards = self._n_step_discounted_rewards[start:end] + if self._game_overs is not None: + self._game_overs = self._game_overs[start:end] + for k, v in self._next_states.items(): + self._next_states[k] = v[start:end] + if self._goals is not None: + self._goals = self._goals[start:end] + for k, v in self._info.items(): + self._info[k] = v[start:end]
+ +
[docs] def shuffle(self) -> None: + """ + Shuffle all the transitions in the batch + + :return: None + """ + batch_order = list(range(self.size)) + shuffle(batch_order) + self.transitions = [self.transitions[i] for i in batch_order] + self._states = {} + self._actions = None + self._rewards = None + self._n_step_discounted_rewards = None + self._game_overs = None + self._next_states = {} + self._goals = None + self._info = {}
+ + # This seems to be slower + # for k, v in self._states.items(): + # self._states[k] = [v[i] for i in batch_order] + # if self._actions is not None: + # self._actions = [self._actions[i] for i in batch_order] + # if self._rewards is not None: + # self._rewards = [self._rewards[i] for i in batch_order] + # if self._total_returns is not None: + # self._total_returns = [self._total_returns[i] for i in batch_order] + # if self._game_overs is not None: + # self._game_overs = [self._game_overs[i] for i in batch_order] + # for k, v in self._next_states.items(): + # self._next_states[k] = [v[i] for i in batch_order] + # if self._goals is not None: + # self._goals = [self._goals[i] for i in batch_order] + # for k, v in self._info.items(): + # self._info[k] = [v[i] for i in batch_order] + +
[docs] def states(self, fetches: List[str], expand_dims=False) -> Dict[str, np.ndarray]: + """ + follow the keys in fetches to extract the corresponding items from the states in the batch + if these keys were not already extracted before. return only the values corresponding to those keys + + :param fetches: the keys of the state dictionary to extract + :param expand_dims: add an extra dimension to each of the value batches + :return: a dictionary containing a batch of values correponding to each of the given fetches keys + """ + current_states = {} + # there are cases (e.g. ddpg) where the state does not contain all the information needed for running + # through the network and this has to be added externally (e.g. ddpg where the action needs to be given in + # addition to the current_state, so that all the inputs of the network will be filled) + for key in set(fetches).intersection(self.transitions[0].state.keys()): + if key not in self._states.keys(): + self._states[key] = np.array([np.array(transition.state[key]) for transition in self.transitions]) + if expand_dims: + current_states[key] = np.expand_dims(self._states[key], -1) + else: + current_states[key] = self._states[key] + return current_states
+ +
[docs] def actions(self, expand_dims=False) -> np.ndarray: + """ + if the actions were not converted to a batch before, extract them to a batch and then return the batch + + :param expand_dims: add an extra dimension to the actions batch + :return: a numpy array containing all the actions of the batch + """ + if self._actions is None: + self._actions = np.array([transition.action for transition in self.transitions]) + if expand_dims: + return np.expand_dims(self._actions, -1) + return self._actions
+ +
[docs] def rewards(self, expand_dims=False) -> np.ndarray: + """ + if the rewards were not converted to a batch before, extract them to a batch and then return the batch + + :param expand_dims: add an extra dimension to the rewards batch + :return: a numpy array containing all the rewards of the batch + """ + if self._rewards is None: + self._rewards = np.array([transition.reward for transition in self.transitions]) + if expand_dims: + return np.expand_dims(self._rewards, -1) + return self._rewards
+ +
[docs] def n_step_discounted_rewards(self, expand_dims=False) -> np.ndarray: + """ + if the n_step_discounted_rewards were not converted to a batch before, extract them to a batch and then return + the batch + if the n step discounted rewards were not filled, this will raise an exception + :param expand_dims: add an extra dimension to the total_returns batch + :return: a numpy array containing all the total return values of the batch + """ + if self._n_step_discounted_rewards is None: + self._n_step_discounted_rewards = np.array([transition.n_step_discounted_rewards for transition in + self.transitions]) + if expand_dims: + return np.expand_dims(self._n_step_discounted_rewards, -1) + return self._n_step_discounted_rewards
+ +
[docs] def game_overs(self, expand_dims=False) -> np.ndarray: + """ + if the game_overs were not converted to a batch before, extract them to a batch and then return the batch + + :param expand_dims: add an extra dimension to the game_overs batch + :return: a numpy array containing all the game over flags of the batch + """ + if self._game_overs is None: + self._game_overs = np.array([transition.game_over for transition in self.transitions]) + if expand_dims: + return np.expand_dims(self._game_overs, -1) + return self._game_overs
+ +
[docs] def next_states(self, fetches: List[str], expand_dims=False) -> Dict[str, np.ndarray]: + """ + follow the keys in fetches to extract the corresponding items from the next states in the batch + if these keys were not already extracted before. return only the values corresponding to those keys + + :param fetches: the keys of the state dictionary to extract + :param expand_dims: add an extra dimension to each of the value batches + :return: a dictionary containing a batch of values correponding to each of the given fetches keys + """ + next_states = {} + # there are cases (e.g. ddpg) where the state does not contain all the information needed for running + # through the network and this has to be added externally (e.g. ddpg where the action needs to be given in + # addition to the current_state, so that all the inputs of the network will be filled) + for key in set(fetches).intersection(self.transitions[0].next_state.keys()): + if key not in self._next_states.keys(): + self._next_states[key] = np.array( + [np.array(transition.next_state[key]) for transition in self.transitions]) + if expand_dims: + next_states[key] = np.expand_dims(self._next_states[key], -1) + else: + next_states[key] = self._next_states[key] + return next_states
+ +
[docs] def goals(self, expand_dims=False) -> np.ndarray: + """ + if the goals were not converted to a batch before, extract them to a batch and then return the batch + if the goal was not filled, this will raise an exception + + :param expand_dims: add an extra dimension to the goals batch + :return: a numpy array containing all the goals of the batch + """ + if self._goals is None: + self._goals = np.array([transition.goal for transition in self.transitions]) + if expand_dims: + return np.expand_dims(self._goals, -1) + return self._goals
+ +
[docs] def info_as_list(self, key) -> list: + """ + get the info and store it internally as a list, if wasn't stored before. return it as a list + :param expand_dims: add an extra dimension to the info batch + :return: a list containing all the info values of the batch corresponding to the given key + """ + if key not in self._info.keys(): + self._info[key] = [transition.info[key] for transition in self.transitions] + return self._info[key]
+ +
[docs] def info(self, key, expand_dims=False) -> np.ndarray: + """ + if the given info dictionary key was not converted to a batch before, extract it to a batch and then return the + batch. if the key is not part of the keys in the info dictionary, this will raise an exception + + :param expand_dims: add an extra dimension to the info batch + :return: a numpy array containing all the info values of the batch corresponding to the given key + """ + info_list = self.info_as_list(key) + + if expand_dims: + return np.expand_dims(info_list, -1) + return np.array(info_list)
+ + @property + def size(self) -> int: + """ + :return: the size of the batch + """ + return len(self.transitions) + + def __getitem__(self, key): + """ + get an item from the transitions list + + :param key: index of the transition in the batch + :return: the transition corresponding to the given index + """ + return self.transitions[key] + + def __setitem__(self, key, item): + """ + set an item in the transition list + + :param key: index of the transition in the batch + :param item: the transition to place in the given index + :return: None + """ + self.transitions[key] = item
+ + +class TotalStepsCounter(object): + """ + A wrapper around a dictionary counting different StepMethods steps done. + """ + + def __init__(self): + self.counters = { + EnvironmentEpisodes: 0, + EnvironmentSteps: 0, + TrainingSteps: 0 + } + + def __getitem__(self, key: Type[StepMethod]) -> int: + """ + get counter value + + :param key: counter type + :return: the counter value + """ + return self.counters[key] + + def __setitem__(self, key: StepMethod, item: int) -> None: + """ + set an item in the transition list + + :param key: counter type + :param item: an integer representing the new counter value + :return: None + """ + self.counters[key] = item + + def __add__(self, other: Type[StepMethod]) -> Type[StepMethod]: + return other.__class__(self.counters[other.__class__] + other.num_steps) + + def __lt__(self, other: Type[StepMethod]): + return self.counters[other.__class__] < other.num_steps + + +class GradientClippingMethod(Enum): + ClipByGlobalNorm = 0 + ClipByNorm = 1 + ClipByValue = 2 + + +
[docs]class Episode(object): + """ + An Episode represents a set of sequential transitions, that end with a terminal state. + """ + def __init__(self, discount: float=0.99, bootstrap_total_return_from_old_policy: bool=False, n_step: int=-1): + """ + :param discount: the discount factor to use when calculating total returns + :param bootstrap_total_return_from_old_policy: should the total return be bootstrapped from the values in the + memory + :param n_step: the number of future steps to sum the reward over before bootstrapping + """ + self.transitions = [] + self._length = 0 + self.discount = discount + self.bootstrap_total_return_from_old_policy = bootstrap_total_return_from_old_policy + self.n_step = n_step + self.is_complete = False + +
[docs] def insert(self, transition: Transition) -> None: + """ + Insert a new transition to the episode. If the game_over flag in the transition is set to True, + the episode will be marked as complete. + + :param transition: The new transition to insert to the episode + :return: None + """ + self.transitions.append(transition) + self._length += 1 + if transition.game_over: + self.is_complete = True
+ +
[docs] def is_empty(self) -> bool: + """ + Check if the episode is empty + + :return: A boolean value determining if the episode is empty or not + """ + return self.length() == 0
+ +
[docs] def length(self) -> int: + """ + Return the length of the episode, which is the number of transitions it holds. + + :return: The number of transitions in the episode + """ + return self._length
+ + def __len__(self): + return self.length() + +
[docs] def get_transition(self, transition_idx: int) -> Transition: + """ + Get a specific transition by its index. + + :param transition_idx: The index of the transition to get + :return: The transition which is stored in the given index + """ + return self.transitions[transition_idx]
+ +
[docs] def get_last_transition(self) -> Transition: + """ + Get the last transition in the episode, or None if there are no transition available + + :return: The last transition in the episode + """ + return self.get_transition(-1) if self.length() > 0 else None
+ +
[docs] def get_first_transition(self) -> Transition: + """ + Get the first transition in the episode, or None if there are no transitions available + + :return: The first transition in the episode + """ + return self.get_transition(0) if self.length() > 0 else None
+ +
[docs] def update_discounted_rewards(self): + """ + Update the discounted returns for all the transitions in the episode. + The returns will be calculated according to the rewards of each transition, together with the number of steps + to bootstrap from and the discount factor, as defined by n_step and discount respectively when initializing + the episode. + + :return: None + """ + if self.n_step == -1 or self.n_step > self.length(): + curr_n_step = self.length() + else: + curr_n_step = self.n_step + + rewards = np.array([t.reward for t in self.transitions]) + rewards = rewards.astype('float') + discounted_rewards = rewards.copy() + current_discount = self.discount + for i in range(1, curr_n_step): + discounted_rewards += current_discount * np.pad(rewards[i:], (0, i), 'constant', constant_values=0) + current_discount *= self.discount + + # calculate the bootstrapped returns + if self.bootstrap_total_return_from_old_policy: + bootstraps = np.array([np.squeeze(t.info['max_action_value']) for t in self.transitions[curr_n_step:]]) + bootstrapped_return = discounted_rewards + current_discount * np.pad(bootstraps, (0, curr_n_step), + 'constant', constant_values=0) + discounted_rewards = bootstrapped_return + + for transition_idx in range(self.length()): + self.transitions[transition_idx].n_step_discounted_rewards = discounted_rewards[transition_idx]
+ + def update_transitions_rewards_and_bootstrap_data(self): + if not isinstance(self.n_step, int) or (self.n_step < 1 and self.n_step != -1): + raise ValueError("n-step should be an integer with value >= 1, or set to -1 for always setting to episode" + " length.") + elif self.n_step > 1: + curr_n_step = self.n_step if self.n_step < self.length() else self.length() + + for idx, transition in enumerate(self.transitions): + next_n_step_transition_idx = (idx + curr_n_step) + if next_n_step_transition_idx < len(self.transitions): + # next state will now point to the n-step next state + transition.next_state = self.transitions[next_n_step_transition_idx].state + transition.info['should_bootstrap_next_state'] = True + else: + transition.next_state = self.transitions[-1].next_state + transition.info['should_bootstrap_next_state'] = False + + self.update_discounted_rewards() + + + +
[docs] def get_transitions_attribute(self, attribute_name: str) -> List[Any]: + """ + Get the values for some transition attribute from all the transitions in the episode. + For example, this allows getting the rewards for all the transitions as a list by calling + get_transitions_attribute('reward') + + :param attribute_name: The name of the attribute to extract from all the transitions + :return: A list of values from all the transitions according to the attribute given in attribute_name + """ + if len(self.transitions) > 0 and hasattr(self.transitions[0], attribute_name): + return [getattr(t, attribute_name) for t in self.transitions] + elif len(self.transitions) == 0: + return [] + else: + raise ValueError("The transitions have no such attribute name")
+ + def __getitem__(self, sliced): + return self.transitions[sliced]
+ + +""" +Video Dumping Methods +""" + + +class VideoDumpFilter(object): + """ + Method used to decide when to dump videos + """ + def should_dump(self, episode_terminated=False, **kwargs): + raise NotImplementedError("") + + +class AlwaysDumpFilter(VideoDumpFilter): + """ + Dump video for every episode + """ + def __init__(self): + super().__init__() + + def should_dump(self, episode_terminated=False, **kwargs): + return True + + +class MaxDumpFilter(VideoDumpFilter): + """ + Dump video every time a new max total reward has been achieved + """ + def __init__(self): + super().__init__() + self.max_reward_achieved = -np.inf + + def should_dump(self, episode_terminated=False, **kwargs): + # if the episode has not finished yet we want to be prepared for dumping a video + if not episode_terminated: + return True + if kwargs['total_reward_in_current_episode'] > self.max_reward_achieved: + self.max_reward_achieved = kwargs['total_reward_in_current_episode'] + return True + else: + return False + + +class EveryNEpisodesDumpFilter(object): + """ + Dump videos once in every N episodes + """ + def __init__(self, num_episodes_between_dumps: int): + super().__init__() + self.num_episodes_between_dumps = num_episodes_between_dumps + self.last_dumped_episode = 0 + if num_episodes_between_dumps < 1: + raise ValueError("the number of episodes between dumps should be a positive number") + + def should_dump(self, episode_terminated=False, **kwargs): + if kwargs['episode_idx'] >= self.last_dumped_episode + self.num_episodes_between_dumps - 1: + self.last_dumped_episode = kwargs['episode_idx'] + return True + else: + return False + + +class SelectedPhaseOnlyDumpFilter(object): + """ + Dump videos when the phase of the environment matches a predefined phase + """ + def __init__(self, run_phases: Union[RunPhase, List[RunPhase]]): + self.run_phases = force_list(run_phases) + + def should_dump(self, episode_terminated=False, **kwargs): + if kwargs['_phase'] in self.run_phases: + return True + else: + return False +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/carla_environment.html b/docs/_modules/rl_coach/environments/carla_environment.html new file mode 100644 index 0000000..ff99f1c --- /dev/null +++ b/docs/_modules/rl_coach/environments/carla_environment.html @@ -0,0 +1,695 @@ + + + + + + + + + + + rl_coach.environments.carla_environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.carla_environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.carla_environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import random
+import sys
+from os import path, environ
+
+from rl_coach.logger import screen
+from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
+from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
+from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
+
+try:
+    if 'CARLA_ROOT' in environ:
+        sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
+    else:
+        screen.error("CARLA_ROOT was not defined. Please set it to point to the CARLA root directory and try again.")
+    from carla.client import CarlaClient
+    from carla.settings import CarlaSettings
+    from carla.tcp import TCPConnectionError
+    from carla.sensor import Camera
+    from carla.client import VehicleControl
+    from carla.planner.planner import Planner
+    from carla.driving_benchmark.experiment_suites.experiment_suite import ExperimentSuite
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("CARLA")
+
+import logging
+import subprocess
+from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
+from rl_coach.spaces import BoxActionSpace, ImageObservationSpace, StateSpace, \
+    VectorObservationSpace
+from rl_coach.utils import get_open_port, force_list
+from enum import Enum
+import os
+import signal
+from typing import List, Union
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.filters.filter import InputFilter, NoOutputFilter
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
+from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
+import numpy as np
+
+
+# enum of the available levels and their path
+class CarlaLevel(Enum):
+    TOWN1 = {"map_name": "Town01", "map_path": "/Game/Maps/Town01"}
+    TOWN2 = {"map_name": "Town02", "map_path": "/Game/Maps/Town02"}
+
+key_map = {
+    'BRAKE': (274,),  # down arrow
+    'GAS': (273,),  # up arrow
+    'TURN_LEFT': (276,),  # left arrow
+    'TURN_RIGHT': (275,),  # right arrow
+    'GAS_AND_TURN_LEFT': (273, 276),
+    'GAS_AND_TURN_RIGHT': (273, 275),
+    'BRAKE_AND_TURN_LEFT': (274, 276),
+    'BRAKE_AND_TURN_RIGHT': (274, 275),
+}
+
+CarlaInputFilter = InputFilter(is_a_reference_filter=True)
+CarlaInputFilter.add_observation_filter('forward_camera', 'rescaling',
+                                        ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([128, 180, 3]),
+                                                                                             high=255)))
+CarlaInputFilter.add_observation_filter('forward_camera', 'to_grayscale', ObservationRGBToYFilter())
+CarlaInputFilter.add_observation_filter('forward_camera', 'to_uint8', ObservationToUInt8Filter(0, 255))
+CarlaInputFilter.add_observation_filter('forward_camera', 'stacking', ObservationStackingFilter(4))
+
+CarlaOutputFilter = NoOutputFilter()
+
+
+class CameraTypes(Enum):
+    FRONT = "forward_camera"
+    LEFT = "left_camera"
+    RIGHT = "right_camera"
+    SEGMENTATION = "segmentation"
+    DEPTH = "depth"
+    LIDAR = "lidar"
+
+
+class CarlaEnvironmentParameters(EnvironmentParameters):
+    class Quality(Enum):
+        LOW = "Low"
+        EPIC = "Epic"
+
+    def __init__(self, level="town1"):
+        super().__init__(level=level)
+        self.frame_skip = 3  # the frame skip affects the fps of the server directly. fps = 30 / frameskip
+        self.server_height = 512
+        self.server_width = 720
+        self.camera_height = 128
+        self.camera_width = 180
+        self.experiment_suite = None  # an optional CARLA experiment suite to use
+        self.config = None
+        self.level = level
+        self.quality = self.Quality.LOW
+        self.cameras = [CameraTypes.FRONT]
+        self.weather_id = [1]
+        self.verbose = True
+        self.episode_max_time = 100000  # miliseconds for each episode
+        self.allow_braking = False
+        self.separate_actions_for_throttle_and_brake = False
+        self.num_speedup_steps = 30
+        self.max_speed = 35.0  # km/h
+        self.default_input_filter = CarlaInputFilter
+        self.default_output_filter = CarlaOutputFilter
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.carla_environment:CarlaEnvironment'
+
+
+
[docs]class CarlaEnvironment(Environment): + def __init__(self, level: LevelSelection, + seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], + visualization_parameters: VisualizationParameters, + server_height: int, server_width: int, camera_height: int, camera_width: int, + verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, + allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, + cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, + separate_actions_for_throttle_and_brake: bool, + num_speedup_steps: int, max_speed: float, target_success_rate: float = 1.0, **kwargs): + super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) + + # server configuration + self.server_height = server_height + self.server_width = server_width + self.port = get_open_port() + self.host = 'localhost' + self.map_name = CarlaLevel[level.upper()].value['map_name'] + self.map_path = CarlaLevel[level.upper()].value['map_path'] + self.experiment_path = experiment_path + + # client configuration + self.verbose = verbose + self.quality = quality + self.cameras = cameras + self.weather_id = weather_id + self.episode_max_time = episode_max_time + self.allow_braking = allow_braking + self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake + self.camera_width = camera_width + self.camera_height = camera_height + + # setup server settings + self.experiment_suite = experiment_suite + self.config = config + if self.config: + # load settings from file + with open(self.config, 'r') as fp: + self.settings = fp.read() + else: + # hard coded settings + self.settings = CarlaSettings() + self.settings.set( + SynchronousMode=True, + SendNonPlayerAgentsInfo=False, + NumberOfVehicles=15, + NumberOfPedestrians=30, + WeatherId=random.choice(force_list(self.weather_id)), + QualityLevel=self.quality.value, + SeedVehicles=seed, + SeedPedestrians=seed) + if seed is None: + self.settings.randomize_seeds() + + self.settings = self._add_cameras(self.settings, self.cameras, self.camera_width, self.camera_height) + + # open the server + self.server = self._open_server() + + logging.disable(40) + + # open the client + self.game = CarlaClient(self.host, self.port, timeout=99999999) + self.game.connect() + if self.experiment_suite: + self.current_experiment_idx = 0 + self.current_experiment = self.experiment_suite.get_experiments()[self.current_experiment_idx] + self.scene = self.game.load_settings(self.current_experiment.conditions) + else: + self.scene = self.game.load_settings(self.settings) + + # get available start positions + self.positions = self.scene.player_start_spots + self.num_positions = len(self.positions) + self.current_start_position_idx = 0 + self.current_pose = 0 + + # state space + self.state_space = StateSpace({ + "measurements": VectorObservationSpace(4, measurements_names=["forward_speed", "x", "y", "z"]) + }) + for camera in self.scene.sensors: + self.state_space[camera.name] = ImageObservationSpace( + shape=np.array([self.camera_height, self.camera_width, 3]), + high=255) + + # action space + if self.separate_actions_for_throttle_and_brake: + self.action_space = BoxActionSpace(shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]), + descriptions=["steer", "gas", "brake"]) + else: + self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1]), + descriptions=["steer", "gas_and_brake"]) + + # human control + if self.human_control: + # convert continuous action space to discrete + self.steering_strength = 0.5 + self.gas_strength = 1.0 + self.brake_strength = 0.5 + # TODO: reverse order of actions + self.action_space = PartialDiscreteActionSpaceMap( + target_actions=[[0., 0.], + [0., -self.steering_strength], + [0., self.steering_strength], + [self.gas_strength, 0.], + [-self.brake_strength, 0], + [self.gas_strength, -self.steering_strength], + [self.gas_strength, self.steering_strength], + [self.brake_strength, -self.steering_strength], + [self.brake_strength, self.steering_strength]], + descriptions=['NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE', + 'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT', + 'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT'] + ) + + # map keyboard keys to actions + for idx, action in enumerate(self.action_space.descriptions): + for key in key_map.keys(): + if action == key: + self.key_to_action[key_map[key]] = idx + + self.num_speedup_steps = num_speedup_steps + self.max_speed = max_speed + + # measurements + self.autopilot = None + self.planner = Planner(self.map_name) + + # env initialization + self.reset_internal_state(True) + + # render + if self.is_rendered: + image = self.get_rendered_image() + self.renderer.create_screen(image.shape[1], image.shape[0]) + + self.target_success_rate = target_success_rate + + def _add_cameras(self, settings, cameras, camera_width, camera_height): + # add a front facing camera + if CameraTypes.FRONT in cameras: + camera = Camera(CameraTypes.FRONT.value) + camera.set(FOV=100) + camera.set_image_size(camera_width, camera_height) + camera.set_position(2.0, 0, 1.4) + camera.set_rotation(-15.0, 0, 0) + settings.add_sensor(camera) + + # add a left facing camera + if CameraTypes.LEFT in cameras: + camera = Camera(CameraTypes.LEFT.value) + camera.set(FOV=100) + camera.set_image_size(camera_width, camera_height) + camera.set_position(2.0, 0, 1.4) + camera.set_rotation(-15.0, -30, 0) + settings.add_sensor(camera) + + # add a right facing camera + if CameraTypes.RIGHT in cameras: + camera = Camera(CameraTypes.RIGHT.value) + camera.set(FOV=100) + camera.set_image_size(camera_width, camera_height) + camera.set_position(2.0, 0, 1.4) + camera.set_rotation(-15.0, 30, 0) + settings.add_sensor(camera) + + # add a front facing depth camera + if CameraTypes.DEPTH in cameras: + camera = Camera(CameraTypes.DEPTH.value) + camera.set_image_size(camera_width, camera_height) + camera.set_position(0.2, 0, 1.3) + camera.set_rotation(8, 30, 0) + camera.PostProcessing = 'Depth' + settings.add_sensor(camera) + + # add a front facing semantic segmentation camera + if CameraTypes.SEGMENTATION in cameras: + camera = Camera(CameraTypes.SEGMENTATION.value) + camera.set_image_size(camera_width, camera_height) + camera.set_position(0.2, 0, 1.3) + camera.set_rotation(8, 30, 0) + camera.PostProcessing = 'SemanticSegmentation' + settings.add_sensor(camera) + + return settings + + def _get_directions(self, current_point, end_point): + """ + Class that should return the directions to reach a certain goal + """ + + directions = self.planner.get_next_command( + (current_point.location.x, + current_point.location.y, 0.22), + (current_point.orientation.x, + current_point.orientation.y, + current_point.orientation.z), + (end_point.location.x, end_point.location.y, 0.22), + (end_point.orientation.x, end_point.orientation.y, end_point.orientation.z)) + return directions + + def _open_server(self): + log_path = path.join(self.experiment_path if self.experiment_path is not None else '.', 'logs', + "CARLA_LOG_{}.txt".format(self.port)) + if not os.path.exists(os.path.dirname(log_path)): + os.makedirs(os.path.dirname(log_path)) + with open(log_path, "wb") as out: + cmd = [path.join(environ.get('CARLA_ROOT'), 'CarlaUE4.sh'), self.map_path, + "-benchmark", "-carla-server", "-fps={}".format(30 / self.frame_skip), + "-world-port={}".format(self.port), + "-windowed -ResX={} -ResY={}".format(self.server_width, self.server_height), + "-carla-no-hud"] + + if self.config: + cmd.append("-carla-settings={}".format(self.config)) + p = subprocess.Popen(cmd, stdout=out, stderr=out) + + return p + + def _close_server(self): + os.killpg(os.getpgid(self.server.pid), signal.SIGKILL) + + def _update_state(self): + # get measurements and observations + measurements = [] + while type(measurements) == list: + measurements, sensor_data = self.game.read_data() + self.state = {} + + for camera in self.scene.sensors: + self.state[camera.name] = sensor_data[camera.name].data + + self.location = [measurements.player_measurements.transform.location.x, + measurements.player_measurements.transform.location.y, + measurements.player_measurements.transform.location.z] + + self.distance_from_goal = np.linalg.norm(np.array(self.location[:2]) - + [self.current_goal.location.x, self.current_goal.location.y]) + + is_collision = measurements.player_measurements.collision_vehicles != 0 \ + or measurements.player_measurements.collision_pedestrians != 0 \ + or measurements.player_measurements.collision_other != 0 + + speed_reward = measurements.player_measurements.forward_speed - 1 + if speed_reward > 30.: + speed_reward = 30. + self.reward = speed_reward \ + - (measurements.player_measurements.intersection_otherlane * 5) \ + - (measurements.player_measurements.intersection_offroad * 5) \ + - is_collision * 100 \ + - np.abs(self.control.steer) * 10 + + # update measurements + self.measurements = [measurements.player_measurements.forward_speed] + self.location + self.autopilot = measurements.player_measurements.autopilot_control + + # The directions to reach the goal (0 Follow lane, 1 Left, 2 Right, 3 Straight) + directions = int(self._get_directions(measurements.player_measurements.transform, self.current_goal) - 2) + self.state['high_level_command'] = directions + + if (measurements.game_timestamp >= self.episode_max_time) or is_collision: + self.done = True + + self.state['measurements'] = np.array(self.measurements) + + def _take_action(self, action): + self.control = VehicleControl() + + if self.separate_actions_for_throttle_and_brake: + self.control.steer = np.clip(action[0], -1, 1) + self.control.throttle = np.clip(action[1], 0, 1) + self.control.brake = np.clip(action[2], 0, 1) + else: + # transform the 2 value action (steer, throttle - brake) into a 3 value action (steer, throttle, brake) + self.control.steer = np.clip(action[0], -1, 1) + self.control.throttle = np.clip(action[1], 0, 1) + self.control.brake = np.abs(np.clip(action[1], -1, 0)) + + # prevent braking + if not self.allow_braking or self.control.brake < 0.1 or self.control.throttle > self.control.brake: + self.control.brake = 0 + + # prevent over speeding + if hasattr(self, 'measurements') and self.measurements[0] * 3.6 > self.max_speed and self.control.brake == 0: + self.control.throttle = 0.0 + + self.control.hand_brake = False + self.control.reverse = False + + self.game.send_control(self.control) + + def _load_experiment(self, experiment_idx): + self.current_experiment = self.experiment_suite.get_experiments()[experiment_idx] + self.scene = self.game.load_settings(self.current_experiment.conditions) + self.positions = self.scene.player_start_spots + self.num_positions = len(self.positions) + self.current_start_position_idx = 0 + self.current_pose = 0 + + def _restart_environment_episode(self, force_environment_reset=False): + # select start and end positions + if self.experiment_suite: + # if an expeirent suite is available, follow its given poses + if self.current_pose >= len(self.current_experiment.poses): + # load a new experiment + self.current_experiment_idx = (self.current_experiment_idx + 1) % len(self.experiment_suite.get_experiments()) + self._load_experiment(self.current_experiment_idx) + + self.current_start_position_idx = self.current_experiment.poses[self.current_pose][0] + self.current_goal = self.positions[self.current_experiment.poses[self.current_pose][1]] + self.current_pose += 1 + else: + # go over all the possible positions in a cyclic manner + self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions + + # choose a random goal destination + self.current_goal = random.choice(self.positions) + + try: + self.game.start_episode(self.current_start_position_idx) + except: + self.game.connect() + self.game.start_episode(self.current_start_position_idx) + + # start the game with some initial speed + for i in range(self.num_speedup_steps): + self.control = VehicleControl(throttle=1.0, brake=0, steer=0, hand_brake=False, reverse=False) + self.game.send_control(VehicleControl()) + + def get_rendered_image(self) -> np.ndarray: + """ + Return a numpy array containing the image that will be rendered to the screen. + This can be different from the observation. For example, mujoco's observation is a measurements vector. + :return: numpy array containing the image that will be rendered to the screen + """ + image = [self.state[camera.name] for camera in self.scene.sensors] + image = np.vstack(image) + return image + + def get_target_success_rate(self) -> float: + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/control_suite_environment.html b/docs/_modules/rl_coach/environments/control_suite_environment.html new file mode 100644 index 0000000..a43cb1d --- /dev/null +++ b/docs/_modules/rl_coach/environments/control_suite_environment.html @@ -0,0 +1,426 @@ + + + + + + + + + + + rl_coach.environments.control_suite_environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.control_suite_environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.control_suite_environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+import random
+from enum import Enum
+from typing import Union
+
+import numpy as np
+
+try:
+    from dm_control import suite
+    from dm_control.suite.wrappers import pixels
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("DeepMind Control Suite")
+
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
+from rl_coach.filters.filter import NoInputFilter, NoOutputFilter
+from rl_coach.spaces import BoxActionSpace, ImageObservationSpace, VectorObservationSpace, StateSpace
+
+
+class ObservationType(Enum):
+    Measurements = 1
+    Image = 2
+    Image_and_Measurements = 3
+
+
+# Parameters
+class ControlSuiteEnvironmentParameters(EnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.observation_type = ObservationType.Measurements
+        self.default_input_filter = ControlSuiteInputFilter
+        self.default_output_filter = ControlSuiteOutputFilter
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.control_suite_environment:ControlSuiteEnvironment'
+
+
+"""
+ControlSuite Environment Components
+"""
+ControlSuiteInputFilter = NoInputFilter()
+ControlSuiteOutputFilter = NoOutputFilter()
+
+control_suite_envs = {':'.join(env): ':'.join(env) for env in suite.BENCHMARKING}
+
+
+# Environment
+
[docs]class ControlSuiteEnvironment(Environment): + def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, + target_success_rate: float=1.0, seed: Union[None, int]=None, human_control: bool=False, + observation_type: ObservationType=ObservationType.Measurements, + custom_reward_threshold: Union[int, float]=None, **kwargs): + """ + :param level: (str) + A string representing the control suite level to run. This can also be a LevelSelection object. + For example, cartpole:swingup. + + :param frame_skip: (int) + The number of frames to skip between any two actions given by the agent. The action will be repeated + for all the skipped frames. + + :param visualization_parameters: (VisualizationParameters) + The parameters used for visualizing the environment, such as the render flag, storing videos etc. + + :param target_success_rate: (float) + Stop experiment if given target success rate was achieved. + + :param seed: (int) + A seed to use for the random number generator when running the environment. + + :param human_control: (bool) + A flag that allows controlling the environment using the keyboard keys. + + :param observation_type: (ObservationType) + An enum which defines which observation to use. The current options are to use: + * Measurements only - a vector of joint torques and similar measurements + * Image only - an image of the environment as seen by a camera attached to the simulator + * Measurements & Image - both type of observations will be returned in the state using the keys + 'measurements' and 'pixels' respectively. + + :param custom_reward_threshold: (float) + Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. + + """ + super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) + + self.observation_type = observation_type + + # load and initialize environment + domain_name, task_name = self.env_id.split(":") + self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) + + if observation_type != ObservationType.Measurements: + self.env = pixels.Wrapper(self.env, pixels_only=observation_type == ObservationType.Image) + + # seed + if self.seed is not None: + np.random.seed(self.seed) + random.seed(self.seed) + + self.state_space = StateSpace({}) + + # image observations + if observation_type != ObservationType.Measurements: + self.state_space['pixels'] = ImageObservationSpace(shape=self.env.observation_spec()['pixels'].shape, + high=255) + + # measurements observations + if observation_type != ObservationType.Image: + measurements_space_size = 0 + measurements_names = [] + for observation_space_name, observation_space in self.env.observation_spec().items(): + if len(observation_space.shape) == 0: + measurements_space_size += 1 + measurements_names.append(observation_space_name) + elif len(observation_space.shape) == 1: + measurements_space_size += observation_space.shape[0] + measurements_names.extend(["{}_{}".format(observation_space_name, i) for i in + range(observation_space.shape[0])]) + self.state_space['measurements'] = VectorObservationSpace(shape=measurements_space_size, + measurements_names=measurements_names) + + # actions + self.action_space = BoxActionSpace( + shape=self.env.action_spec().shape[0], + low=self.env.action_spec().minimum, + high=self.env.action_spec().maximum + ) + + # initialize the state by getting a new state from the environment + self.reset_internal_state(True) + + # render + if self.is_rendered: + image = self.get_rendered_image() + scale = 1 + if self.human_control: + scale = 2 + if not self.native_rendering: + self.renderer.create_screen(image.shape[1]*scale, image.shape[0]*scale) + + self.target_success_rate = target_success_rate + + def _update_state(self): + self.state = {} + + if self.observation_type != ObservationType.Measurements: + self.pixels = self.last_result.observation['pixels'] + self.state['pixels'] = self.pixels + + if self.observation_type != ObservationType.Image: + self.measurements = np.array([]) + for sub_observation in self.last_result.observation.values(): + if isinstance(sub_observation, np.ndarray) and len(sub_observation.shape) == 1: + self.measurements = np.concatenate((self.measurements, sub_observation)) + else: + self.measurements = np.concatenate((self.measurements, np.array([sub_observation]))) + self.state['measurements'] = self.measurements + + self.reward = self.last_result.reward if self.last_result.reward is not None else 0 + + self.done = self.last_result.last() + + def _take_action(self, action): + if type(self.action_space) == BoxActionSpace: + action = self.action_space.clip_action_to_space(action) + + self.last_result = self.env.step(action) + + def _restart_environment_episode(self, force_environment_reset=False): + self.last_result = self.env.reset() + + def _render(self): + pass + + def get_rendered_image(self): + return self.env.physics.render(camera_id=0) + + def get_target_success_rate(self) -> float: + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/doom_environment.html b/docs/_modules/rl_coach/environments/doom_environment.html new file mode 100644 index 0000000..a71d746 --- /dev/null +++ b/docs/_modules/rl_coach/environments/doom_environment.html @@ -0,0 +1,495 @@ + + + + + + + + + + + rl_coach.environments.doom_environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.doom_environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.doom_environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+try:
+    import vizdoom
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("ViZDoom")
+
+import os
+from enum import Enum
+from os import path, environ
+from typing import Union, List
+
+import numpy as np
+
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
+from rl_coach.filters.action.full_discrete_action_space_map import FullDiscreteActionSpaceMap
+from rl_coach.filters.filter import InputFilter, OutputFilter
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
+from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
+from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
+from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
+from rl_coach.spaces import MultiSelectActionSpace, ImageObservationSpace, \
+    VectorObservationSpace, StateSpace
+
+
+# enum of the available levels and their path
+class DoomLevel(Enum):
+    BASIC = "basic.cfg"
+    DEFEND = "defend_the_center.cfg"
+    DEATHMATCH = "deathmatch.cfg"
+    MY_WAY_HOME = "my_way_home.cfg"
+    TAKE_COVER = "take_cover.cfg"
+    HEALTH_GATHERING = "health_gathering.cfg"
+    HEALTH_GATHERING_SUPREME_COACH_LOCAL = "D2_navigation.cfg"  # from https://github.com/IntelVCL/DirectFuturePrediction/tree/master/maps
+    DEFEND_THE_LINE = "defend_the_line.cfg"
+    DEADLY_CORRIDOR = "deadly_corridor.cfg"
+    BATTLE_COACH_LOCAL = "D3_battle.cfg"  # from https://github.com/IntelVCL/DirectFuturePrediction/tree/master/maps
+
+key_map = {
+    'NO-OP': 96,  # `
+    'ATTACK': 13,  # enter
+    'CROUCH': 306,  # ctrl
+    'DROP_SELECTED_ITEM': ord("t"),
+    'DROP_SELECTED_WEAPON': ord("t"),
+    'JUMP': 32,  # spacebar
+    'LAND': ord("l"),
+    'LOOK_DOWN': 274,  # down arrow
+    'LOOK_UP': 273,  # up arrow
+    'MOVE_BACKWARD': ord("s"),
+    'MOVE_DOWN': ord("s"),
+    'MOVE_FORWARD': ord("w"),
+    'MOVE_LEFT': 276,
+    'MOVE_RIGHT': 275,
+    'MOVE_UP': ord("w"),
+    'RELOAD': ord("r"),
+    'SELECT_NEXT_WEAPON': ord("q"),
+    'SELECT_PREV_WEAPON': ord("e"),
+    'SELECT_WEAPON0': ord("0"),
+    'SELECT_WEAPON1': ord("1"),
+    'SELECT_WEAPON2': ord("2"),
+    'SELECT_WEAPON3': ord("3"),
+    'SELECT_WEAPON4': ord("4"),
+    'SELECT_WEAPON5': ord("5"),
+    'SELECT_WEAPON6': ord("6"),
+    'SELECT_WEAPON7': ord("7"),
+    'SELECT_WEAPON8': ord("8"),
+    'SELECT_WEAPON9': ord("9"),
+    'SPEED': 304,  # shift
+    'STRAFE': 9,  # tab
+    'TURN180': ord("u"),
+    'TURN_LEFT': ord("a"),  # left arrow
+    'TURN_RIGHT': ord("d"),  # right arrow
+    'USE': ord("f"),
+}
+
+
+DoomInputFilter = InputFilter(is_a_reference_filter=True)
+DoomInputFilter.add_observation_filter('observation', 'rescaling',
+                                       ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([60, 76, 3]),
+                                                                                            high=255)))
+DoomInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
+DoomInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
+DoomInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(3))
+
+
+DoomOutputFilter = OutputFilter(is_a_reference_filter=True)
+DoomOutputFilter.add_action_filter('to_discrete', FullDiscreteActionSpaceMap())
+
+
+class DoomEnvironmentParameters(EnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.default_input_filter = DoomInputFilter
+        self.default_output_filter = DoomOutputFilter
+        self.cameras = [DoomEnvironment.CameraTypes.OBSERVATION]
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.doom_environment:DoomEnvironment'
+
+
+
[docs]class DoomEnvironment(Environment): + class CameraTypes(Enum): + OBSERVATION = ("observation", "screen_buffer") + DEPTH = ("depth", "depth_buffer") + LABELS = ("labels", "labels_buffer") + MAP = ("map", "automap_buffer") + + def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, + custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, + cameras: List[CameraTypes], target_success_rate: float=1.0, **kwargs): + """ + :param level: (str) + A string representing the doom level to run. This can also be a LevelSelection object. + This should be one of the levels defined in the DoomLevel enum. For example, HEALTH_GATHERING. + + :param seed: (int) + A seed to use for the random number generator when running the environment. + + :param frame_skip: (int) + The number of frames to skip between any two actions given by the agent. The action will be repeated + for all the skipped frames. + + :param human_control: (bool) + A flag that allows controlling the environment using the keyboard keys. + + :param custom_reward_threshold: (float) + Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. + + :param visualization_parameters: (VisualizationParameters) + The parameters used for visualizing the environment, such as the render flag, storing videos etc. + + :param cameras: (List[CameraTypes]) + A list of camera types to use as observation in the state returned from the environment. + Each camera should be an enum from CameraTypes, and there are several options like an RGB observation, + a depth map, a segmentation map, and a top down map of the enviornment. + + :param target_success_rate: (float) + Stop experiment if given target success rate was achieved. + + """ + super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) + + self.cameras = cameras + + # load the emulator with the required level + self.level = DoomLevel[level.upper()] + local_scenarios_path = path.join(os.path.dirname(os.path.realpath(__file__)), 'doom') + if 'COACH_LOCAL' in level: + self.scenarios_dir = local_scenarios_path + elif 'VIZDOOM_ROOT' in environ: + self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios') + else: + self.scenarios_dir = path.join(os.path.dirname(os.path.realpath(vizdoom.__file__)), 'scenarios') + + self.game = vizdoom.DoomGame() + self.game.load_config(path.join(self.scenarios_dir, self.level.value)) + self.game.set_window_visible(False) + self.game.add_game_args("+vid_forcesurface 1") + + self.wait_for_explicit_human_action = True + if self.human_control: + self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_640X480) + elif self.is_rendered: + self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240) + else: + # lower resolution since we actually take only 76x60 and we don't need to render + self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_160X120) + + self.game.set_render_hud(False) + self.game.set_render_crosshair(False) + self.game.set_render_decals(False) + self.game.set_render_particles(False) + for camera in self.cameras: + if hasattr(self.game, 'set_{}_enabled'.format(camera.value[1])): + getattr(self.game, 'set_{}_enabled'.format(camera.value[1]))(True) + self.game.init() + + # actions + actions_description = ['NO-OP'] + actions_description += [str(action).split(".")[1] for action in self.game.get_available_buttons()] + actions_description = actions_description[::-1] + self.action_space = MultiSelectActionSpace(self.game.get_available_buttons_size(), + max_simultaneous_selected_actions=1, + descriptions=actions_description, + allow_no_action_to_be_selected=True) + + # human control + if self.human_control: + # TODO: add this to the action space + # map keyboard keys to actions + for idx, action in enumerate(self.action_space.descriptions): + if action in key_map.keys(): + self.key_to_action[(key_map[action],)] = idx + + # states + self.state_space = StateSpace({ + "measurements": VectorObservationSpace(self.game.get_state().game_variables.shape[0], + measurements_names=[str(m) for m in + self.game.get_available_game_variables()]) + }) + for camera in self.cameras: + self.state_space[camera.value[0]] = ImageObservationSpace( + shape=np.array([self.game.get_screen_height(), self.game.get_screen_width(), 3]), + high=255) + + # seed + if seed is not None: + self.game.set_seed(seed) + self.reset_internal_state() + + # render + if self.is_rendered: + image = self.get_rendered_image() + self.renderer.create_screen(image.shape[1], image.shape[0]) + + self.target_success_rate = target_success_rate + + def _update_state(self): + # extract all data from the current state + state = self.game.get_state() + if state is not None and state.screen_buffer is not None: + self.measurements = state.game_variables + self.state = {'measurements': self.measurements} + for camera in self.cameras: + observation = getattr(state, camera.value[1]) + if len(observation.shape) == 3: + self.state[camera.value[0]] = np.transpose(observation, (1, 2, 0)) + elif len(observation.shape) == 2: + self.state[camera.value[0]] = np.repeat(np.expand_dims(observation, -1), 3, axis=-1) + + self.reward = self.game.get_last_reward() + self.done = self.game.is_episode_finished() + + def _take_action(self, action): + self.game.make_action(list(action), self.frame_skip) + + def _restart_environment_episode(self, force_environment_reset=False): + self.game.new_episode() + + def get_rendered_image(self) -> np.ndarray: + """ + Return a numpy array containing the image that will be rendered to the screen. + This can be different from the observation. For example, mujoco's observation is a measurements vector. + :return: numpy array containing the image that will be rendered to the screen + """ + image = [self.state[camera.value[0]] for camera in self.cameras] + image = np.vstack(image) + return image + + def get_target_success_rate(self) -> float: + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/environment.html b/docs/_modules/rl_coach/environments/environment.html new file mode 100644 index 0000000..3f30f9f --- /dev/null +++ b/docs/_modules/rl_coach/environments/environment.html @@ -0,0 +1,721 @@ + + + + + + + + + + + rl_coach.environments.environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import operator
+import time
+from collections import OrderedDict
+from typing import Union, List, Tuple, Dict
+
+import numpy as np
+
+from rl_coach import logger
+from rl_coach.base_parameters import Parameters
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.core_types import GoalType, ActionType, EnvResponse, RunPhase
+from rl_coach.environments.environment_interface import EnvironmentInterface
+from rl_coach.logger import screen
+from rl_coach.renderer import Renderer
+from rl_coach.spaces import ActionSpace, ObservationSpace, DiscreteActionSpace, RewardSpace, StateSpace
+from rl_coach.utils import squeeze_list, force_list
+
+
+class LevelSelection(object):
+    def __init__(self, level: str):
+        self.selected_level = level
+
+    def select(self, level: str):
+        self.selected_level = level
+
+    def __str__(self):
+        if self.selected_level is None:
+            logger.screen.error("No level has been selected. Please select a level using the -lvl command line flag, "
+                                "or change the level in the preset.", crash=True)
+        return self.selected_level
+
+
+class SingleLevelSelection(LevelSelection):
+    def __init__(self, levels: Union[str, List[str], Dict[str, str]]):
+        super().__init__(None)
+        self.levels = levels
+        if isinstance(levels, list):
+            self.levels = {level: level for level in levels}
+        if isinstance(levels, str):
+            self.levels = {levels: levels}
+
+    def __str__(self):
+        if self.selected_level is None:
+            logger.screen.error("No level has been selected. Please select a level using the -lvl command line flag, "
+                                "or change the level in the preset. \nThe available levels are: \n{}"
+                                .format(', '.join(sorted(self.levels.keys()))), crash=True)
+        if self.selected_level not in self.levels.keys():
+            logger.screen.error("The selected level ({}) is not part of the available levels ({})"
+                                .format(self.selected_level, ', '.join(self.levels.keys())), crash=True)
+        return self.levels[self.selected_level]
+
+
+# class SingleLevelPerPhase(LevelSelection):
+#     def __init__(self, levels: Dict[RunPhase, str]):
+#         super().__init__(None)
+#         self.levels = levels
+#
+#     def __str__(self):
+#         super().__str__()
+#         if self.selected_level not in self.levels.keys():
+#             logger.screen.error("The selected level ({}) is not part of the available levels ({})"
+#                                 .format(self.selected_level, self.levels.keys()), crash=True)
+#         return self.levels[self.selected_level]
+
+
+class CustomWrapper(object):
+    def __init__(self, environment):
+        super().__init__()
+        self.environment = environment
+
+    def __getattr__(self, attr):
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        else:
+            return getattr(self.environment, attr, False)
+
+
+class EnvironmentParameters(Parameters):
+    def __init__(self, level=None):
+        super().__init__()
+        self.level = level
+        self.frame_skip = 4
+        self.seed = None
+        self.human_control = False
+        self.custom_reward_threshold = None
+        self.default_input_filter = None
+        self.default_output_filter = None
+        self.experiment_path = None
+
+        # Set target reward and target_success if present
+        self.target_success_rate = 1.0
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.environment:Environment'
+
+
+
[docs]class Environment(EnvironmentInterface): + def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, + custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, + target_success_rate: float=1.0, **kwargs): + """ + :param level: The environment level. Each environment can have multiple levels + :param seed: a seed for the random number generator of the environment + :param frame_skip: number of frames to skip (while repeating the same action) between each two agent directives + :param human_control: human should control the environment + :param visualization_parameters: a blob of parameters used for visualization of the environment + :param **kwargs: as the class is instantiated by EnvironmentParameters, this is used to support having + additional arguments which will be ignored by this class, but might be used by others + """ + super().__init__() + + # env initialization + + self.game = [] + + self.state = {} + self.observation = None + self.goal = None + self.reward = 0 + self.done = False + self.info = {} + self._last_env_response = None + self.last_action = 0 + self.episode_idx = 0 + self.total_steps_counter = 0 + self.current_episode_steps_counter = 0 + self.last_episode_time = time.time() + self.key_to_action = {} + self.last_episode_images = [] + + # rewards + self.total_reward_in_current_episode = 0 + self.max_reward_achieved = -np.inf + self.reward_success_threshold = custom_reward_threshold + + # spaces + self.state_space = self._state_space = None + self.goal_space = self._goal_space = None + self.action_space = self._action_space = None + self.reward_space = RewardSpace(1, reward_success_threshold=self.reward_success_threshold) # TODO: add a getter and setter + + self.env_id = str(level) + self.seed = seed + self.frame_skip = frame_skip + + # human interaction and visualization + self.human_control = human_control + self.wait_for_explicit_human_action = False + self.is_rendered = visualization_parameters.render or self.human_control + self.native_rendering = visualization_parameters.native_rendering and not self.human_control + self.visualization_parameters = visualization_parameters + if not self.native_rendering: + self.renderer = Renderer() + + # Set target reward and target_success if present + self.target_success_rate = target_success_rate + + @property + def action_space(self) -> Union[List[ActionSpace], ActionSpace]: + """ + Get the action space of the environment + + :return: the action space + """ + return self._action_space + + @action_space.setter + def action_space(self, val: Union[List[ActionSpace], ActionSpace]): + """ + Set the action space of the environment + + :return: None + """ + self._action_space = val + + @property + def state_space(self) -> Union[List[StateSpace], StateSpace]: + """ + Get the state space of the environment + + :return: the observation space + """ + return self._state_space + + @state_space.setter + def state_space(self, val: Union[List[StateSpace], StateSpace]): + """ + Set the state space of the environment + + :return: None + """ + self._state_space = val + + @property + def goal_space(self) -> Union[List[ObservationSpace], ObservationSpace]: + """ + Get the state space of the environment + + :return: the observation space + """ + return self._goal_space + + @goal_space.setter + def goal_space(self, val: Union[List[ObservationSpace], ObservationSpace]): + """ + Set the goal space of the environment + + :return: None + """ + self._goal_space = val + +
[docs] def get_action_from_user(self) -> ActionType: + """ + Get an action from the user keyboard + + :return: action index + """ + if self.wait_for_explicit_human_action: + while len(self.renderer.pressed_keys) == 0: + self.renderer.get_events() + + if self.key_to_action == {}: + # the keys are the numbers on the keyboard corresponding to the action index + if len(self.renderer.pressed_keys) > 0: + action_idx = self.renderer.pressed_keys[0] - ord("1") + if 0 <= action_idx < self.action_space.shape[0]: + return action_idx + else: + # the keys are mapped through the environment to more intuitive keyboard keys + # key = tuple(self.renderer.pressed_keys) + # for key in self.renderer.pressed_keys: + for env_keys in self.key_to_action.keys(): + if set(env_keys) == set(self.renderer.pressed_keys): + return self.action_space.actions[self.key_to_action[env_keys]] + + # return the default action 0 so that the environment will continue running + return self.action_space.default_action
+ + @property + def last_env_response(self) -> Union[List[EnvResponse], EnvResponse]: + """ + Get the last environment response + + :return: a dictionary that contains the state, reward, etc. + """ + return squeeze_list(self._last_env_response) + + @last_env_response.setter + def last_env_response(self, val: Union[List[EnvResponse], EnvResponse]): + """ + Set the last environment response + + :param val: the last environment response + """ + self._last_env_response = force_list(val) + +
[docs] def step(self, action: ActionType) -> EnvResponse: + """ + Make a single step in the environment using the given action + + :param action: an action to use for stepping the environment. Should follow the definition of the action space. + :return: the environment response as returned in get_last_env_response + """ + action = self.action_space.clip_action_to_space(action) + if self.action_space and not self.action_space.val_matches_space_definition(action): + raise ValueError("The given action does not match the action space definition. " + "Action = {}, action space definition = {}".format(action, self.action_space)) + + # store the last agent action done and allow passing None actions to repeat the previously done action + if action is None: + action = self.last_action + self.last_action = action + if self.visualization_parameters.add_rendered_image_to_env_response: + current_rendered_image = self.get_rendered_image() + + self.current_episode_steps_counter += 1 + if self.phase != RunPhase.UNDEFINED: + self.total_steps_counter += 1 + + # act + self._take_action(action) + + # observe + self._update_state() + + if self.is_rendered: + self.render() + + self.total_reward_in_current_episode += self.reward + + if self.visualization_parameters.add_rendered_image_to_env_response: + self.info['image'] = current_rendered_image + + self.last_env_response = \ + EnvResponse( + reward=self.reward, + next_state=self.state, + goal=self.goal, + game_over=self.done, + info=self.info + ) + + # store observations for video / gif dumping + if self.should_dump_video_of_the_current_episode(episode_terminated=False) and \ + (self.visualization_parameters.dump_mp4 or self.visualization_parameters.dump_gifs): + self.last_episode_images.append(self.get_rendered_image()) + + return self.last_env_response
+ +
[docs] def render(self) -> None: + """ + Call the environment function for rendering to the screen + + :return: None + """ + if self.native_rendering: + self._render() + else: + self.renderer.render_image(self.get_rendered_image())
+ +
[docs] def handle_episode_ended(self) -> None: + """ + End an episode + + :return: None + """ + self.dump_video_of_last_episode_if_needed()
+ +
[docs] def reset_internal_state(self, force_environment_reset=False) -> EnvResponse: + """ + Reset the environment and all the variable of the wrapper + + :param force_environment_reset: forces environment reset even when the game did not end + :return: A dictionary containing the observation, reward, done flag, action and measurements + """ + + self._restart_environment_episode(force_environment_reset) + self.last_episode_time = time.time() + + if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED: + self.episode_idx += 1 + + self.done = False + self.total_reward_in_current_episode = self.reward = 0.0 + self.last_action = 0 + self.current_episode_steps_counter = 0 + self.last_episode_images = [] + self._update_state() + + # render before the preprocessing of the observation, so that the image will be in its original quality + if self.is_rendered: + self.render() + + self.last_env_response = \ + EnvResponse( + reward=self.reward, + next_state=self.state, + goal=self.goal, + game_over=self.done, + info=self.info + ) + + return self.last_env_response
+ +
[docs] def get_random_action(self) -> ActionType: + """ + Returns an action picked uniformly from the available actions + + :return: a numpy array with a random action + """ + return self.action_space.sample()
+ +
[docs] def get_available_keys(self) -> List[Tuple[str, ActionType]]: + """ + Return a list of tuples mapping between action names and the keyboard key that triggers them + + :return: a list of tuples mapping between action names and the keyboard key that triggers them + """ + available_keys = [] + if self.key_to_action != {}: + for key, idx in sorted(self.key_to_action.items(), key=operator.itemgetter(1)): + if key != (): + key_names = [self.renderer.get_key_names([k])[0] for k in key] + available_keys.append((self.action_space.descriptions[idx], ' + '.join(key_names))) + elif type(self.action_space) == DiscreteActionSpace: + for action in range(self.action_space.shape): + available_keys.append(("Action {}".format(action + 1), action + 1)) + return available_keys
+ +
[docs] def get_goal(self) -> GoalType: + """ + Get the current goal that the agents needs to achieve in the environment + + :return: The goal + """ + return self.goal
+ +
[docs] def set_goal(self, goal: GoalType) -> None: + """ + Set the current goal that the agent needs to achieve in the environment + + :param goal: the goal that needs to be achieved + :return: None + """ + self.goal = goal
+ + def should_dump_video_of_the_current_episode(self, episode_terminated=False): + if self.visualization_parameters.video_dump_filters: + for video_dump_filter in force_list(self.visualization_parameters.video_dump_filters): + if not video_dump_filter.should_dump(episode_terminated, **self.__dict__): + return False + return True + return True + + def dump_video_of_last_episode_if_needed(self): + if self.last_episode_images != [] and self.should_dump_video_of_the_current_episode(episode_terminated=True): + self.dump_video_of_last_episode() + + def dump_video_of_last_episode(self): + frame_skipping = max(1, int(5 / self.frame_skip)) + file_name = 'episode-{}_score-{}'.format(self.episode_idx, self.total_reward_in_current_episode) + fps = 10 + if self.visualization_parameters.dump_gifs: + logger.create_gif(self.last_episode_images[::frame_skipping], name=file_name, fps=fps) + if self.visualization_parameters.dump_mp4: + logger.create_mp4(self.last_episode_images[::frame_skipping], name=file_name, fps=fps) + + # The following functions define the interaction with the environment. + # Any new environment that inherits the Environment class should use these signatures. + # Some of these functions are optional - please read their description for more details. + + def _take_action(self, action_idx: ActionType) -> None: + """ + An environment dependent function that sends an action to the simulator. + + :param action_idx: the action to perform on the environment + :return: None + """ + raise NotImplementedError("") + + def _update_state(self) -> None: + """ + Updates the state from the environment. + Should update self.observation, self.reward, self.done, self.measurements and self.info + + :return: None + """ + raise NotImplementedError("") + + def _restart_environment_episode(self, force_environment_reset=False) -> None: + """ + Restarts the simulator episode + + :param force_environment_reset: Force the environment to reset even if the episode is not done yet. + :return: None + """ + raise NotImplementedError("") + + def _render(self) -> None: + """ + Renders the environment using the native simulator renderer + + :return: None + """ + pass + +
[docs] def get_rendered_image(self) -> np.ndarray: + """ + Return a numpy array containing the image that will be rendered to the screen. + This can be different from the observation. For example, mujoco's observation is a measurements vector. + + :return: numpy array containing the image that will be rendered to the screen + """ + return np.transpose(self.state['observation'], [1, 2, 0])
+ + def get_target_success_rate(self) -> float: + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/gym_environment.html b/docs/_modules/rl_coach/environments/gym_environment.html new file mode 100644 index 0000000..eea80ec --- /dev/null +++ b/docs/_modules/rl_coach/environments/gym_environment.html @@ -0,0 +1,703 @@ + + + + + + + + + + + rl_coach.environments.gym_environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.gym_environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.gym_environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import gym
+import numpy as np
+import scipy.ndimage
+
+from rl_coach.graph_managers.graph_manager import ScheduleParameters
+from rl_coach.utils import lower_under_to_upper, short_dynamic_import
+
+try:
+    import roboschool
+    from OpenGL import GL
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("RoboSchool")
+
+try:
+    from rl_coach.gym_extensions.continuous import mujoco
+except:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("GymExtensions")
+
+try:
+    import pybullet_envs
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("PyBullet")
+
+from typing import Dict, Any, Union
+from rl_coach.core_types import RunPhase, EnvironmentSteps
+from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, ImageObservationSpace, VectorObservationSpace, \
+    StateSpace, RewardSpace
+from rl_coach.filters.filter import NoInputFilter, NoOutputFilter
+from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
+from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
+from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
+from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
+from rl_coach.filters.filter import InputFilter
+import random
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.logger import screen
+
+
+# Parameters
+class GymEnvironmentParameters(EnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.random_initialization_steps = 0
+        self.max_over_num_frames = 1
+        self.additional_simulator_parameters = {}
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.gym_environment:GymEnvironment'
+
+
+# Generic parameters for vector environments such as mujoco, roboschool, bullet, etc.
+class GymVectorEnvironment(GymEnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.frame_skip = 1
+        self.default_input_filter = NoInputFilter()
+        self.default_output_filter = NoOutputFilter()
+
+
+# Roboschool
+gym_roboschool_envs = ['inverted_pendulum', 'inverted_pendulum_swingup', 'inverted_double_pendulum', 'reacher',
+                       'hopper', 'walker2d', 'half_cheetah', 'ant', 'humanoid', 'humanoid_flagrun',
+                       'humanoid_flagrun_harder', 'pong']
+roboschool_v0 = {e: "{}".format(lower_under_to_upper(e) + '-v0') for e in gym_roboschool_envs}
+
+# Mujoco
+gym_mujoco_envs = ['inverted_pendulum', 'inverted_double_pendulum', 'reacher', 'hopper', 'walker2d', 'half_cheetah',
+                   'ant', 'swimmer', 'humanoid', 'humanoid_standup', 'pusher', 'thrower', 'striker']
+
+mujoco_v2 = {e: "{}".format(lower_under_to_upper(e) + '-v2') for e in gym_mujoco_envs}
+mujoco_v2['walker2d'] = 'Walker2d-v2'
+
+# Fetch
+gym_fetch_envs = ['reach', 'slide', 'push', 'pick_and_place']
+fetch_v1 = {e: "{}".format('Fetch' + lower_under_to_upper(e) + '-v1') for e in gym_fetch_envs}
+
+
+"""
+Atari Environment Components
+"""
+
+AtariInputFilter = InputFilter(is_a_reference_filter=True)
+AtariInputFilter.add_reward_filter('clipping', RewardClippingFilter(-1.0, 1.0))
+AtariInputFilter.add_observation_filter('observation', 'rescaling',
+                                        ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]),
+                                                                                             high=255)))
+AtariInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
+AtariInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
+AtariInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(4))
+AtariOutputFilter = NoOutputFilter()
+
+
+class Atari(GymEnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.frame_skip = 4
+        self.max_over_num_frames = 2
+        self.random_initialization_steps = 30
+        self.default_input_filter = AtariInputFilter
+        self.default_output_filter = AtariOutputFilter
+
+
+gym_atari_envs = ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
+                  'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
+                  'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
+                  'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
+                  'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
+                  'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
+                  'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
+                  'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
+                  'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']
+atari_deterministic_v4 = {e: "{}".format(lower_under_to_upper(e) + 'Deterministic-v4') for e in gym_atari_envs}
+atari_no_frameskip_v4 = {e: "{}".format(lower_under_to_upper(e) + 'NoFrameskip-v4') for e in gym_atari_envs}
+
+
+# default atari schedule used in the DeepMind papers
+atari_schedule = ScheduleParameters()
+atari_schedule.improve_steps = EnvironmentSteps(50000000)
+atari_schedule.steps_between_evaluation_periods = EnvironmentSteps(250000)
+atari_schedule.evaluation_steps = EnvironmentSteps(135000)
+atari_schedule.heatup_steps = EnvironmentSteps(1)
+
+
+class MaxOverFramesAndFrameskipEnvWrapper(gym.Wrapper):
+    def __init__(self, env, frameskip=4, max_over_num_frames=2):
+        super().__init__(env)
+        self.max_over_num_frames = max_over_num_frames
+        self.observations_stack = []
+        self.frameskip = frameskip
+        self.first_frame_to_max_over = self.frameskip - self.max_over_num_frames
+
+    def reset(self):
+        return self.env.reset()
+
+    def step(self, action):
+        total_reward = 0.0
+        done = None
+        info = None
+        self.observations_stack = []
+        for i in range(self.frameskip):
+            observation, reward, done, info = self.env.step(action)
+            if i >= self.first_frame_to_max_over:
+                self.observations_stack.append(observation)
+            total_reward += reward
+            if done:
+                # deal with last state in episode
+                if not self.observations_stack:
+                    self.observations_stack.append(observation)
+                break
+
+        max_over_frames_observation = np.max(self.observations_stack, axis=0)
+
+        return max_over_frames_observation, total_reward, done, info
+
+
+# Environment
+
[docs]class GymEnvironment(Environment): + def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, + target_success_rate: float=1.0, additional_simulator_parameters: Dict[str, Any] = {}, seed: Union[None, int]=None, + human_control: bool=False, custom_reward_threshold: Union[int, float]=None, + random_initialization_steps: int=1, max_over_num_frames: int=1, **kwargs): + """ + :param level: (str) + A string representing the gym level to run. This can also be a LevelSelection object. + For example, BreakoutDeterministic-v0 + + :param frame_skip: (int) + The number of frames to skip between any two actions given by the agent. The action will be repeated + for all the skipped frames. + + :param visualization_parameters: (VisualizationParameters) + The parameters used for visualizing the environment, such as the render flag, storing videos etc. + + :param additional_simulator_parameters: (Dict[str, Any]) + Any additional parameters that the user can pass to the Gym environment. These parameters should be + accepted by the __init__ function of the implemented Gym environment. + + :param seed: (int) + A seed to use for the random number generator when running the environment. + + :param human_control: (bool) + A flag that allows controlling the environment using the keyboard keys. + + :param custom_reward_threshold: (float) + Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. + If not set, this value will be taken from the Gym environment definition. + + :param random_initialization_steps: (int) + The number of random steps that will be taken in the environment after each reset. + This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees. + + :param max_over_num_frames: (int) + This value will be used for merging multiple frames into a single frame by taking the maximum value for each + of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects + can be seen in one frame but disappear in the next. + """ + super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, + visualization_parameters, target_success_rate) + + self.random_initialization_steps = random_initialization_steps + self.max_over_num_frames = max_over_num_frames + self.additional_simulator_parameters = additional_simulator_parameters + + # hide warnings + gym.logger.set_level(40) + + """ + load and initialize environment + environment ids can be defined in 3 ways: + 1. Native gym environments like BreakoutDeterministic-v0 for example + 2. Custom gym environments written and installed as python packages. + This environments should have a python module with a class inheriting gym.Env, implementing the + relevant functions (_reset, _step, _render) and defining the observation and action space + For example: my_environment_package:MyEnvironmentClass will run an environment defined in the + MyEnvironmentClass class + 3. Custom gym environments written as an independent module which is not installed. + This environments should have a python module with a class inheriting gym.Env, implementing the + relevant functions (_reset, _step, _render) and defining the observation and action space. + For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an + environment defined in the MyEnvironmentClass class which is located in the module in the relative path + path_to_my_environment.sub_directory.my_module + """ + if ':' in self.env_id: + # custom environments + if '/' in self.env_id or '.' in self.env_id: + # environment in a an absolute path module written as a unix path or in a relative path module + # written as a python import path + env_class = short_dynamic_import(self.env_id) + else: + # environment in a python package + env_class = gym.envs.registration.load(self.env_id) + + # instantiate the environment + try: + self.env = env_class(**self.additional_simulator_parameters) + except: + screen.error("Failed to instantiate Gym environment class %s with arguments %s" % + (env_class, self.additional_simulator_parameters), crash=False) + raise + else: + self.env = gym.make(self.env_id) + + # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows + environment_to_always_use_with_native_rendering = ['classic_control', 'mujoco', 'robotics'] + self.native_rendering = self.native_rendering or \ + any([env in str(self.env.unwrapped.__class__) + for env in environment_to_always_use_with_native_rendering]) + if self.native_rendering: + if hasattr(self, 'renderer'): + self.renderer.close() + + # seed + if self.seed is not None: + self.env.seed(self.seed) + np.random.seed(self.seed) + random.seed(self.seed) + + # frame skip and max between consecutive frames + self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__) + self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) + self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) + self.timelimit_env_wrapper = self.env + if self.is_atari_env: + self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env + if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: + screen.warning("Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " + "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " + "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4.") + self.frame_skip = 3 + self.env = MaxOverFramesAndFrameskipEnvWrapper(self.env, + frameskip=self.frame_skip, + max_over_num_frames=self.max_over_num_frames) + else: + self.env.unwrapped.frameskip = self.frame_skip + + self.state_space = StateSpace({}) + + # observations + if not isinstance(self.env.observation_space, gym.spaces.dict_space.Dict): + state_space = {'observation': self.env.observation_space} + else: + state_space = self.env.observation_space.spaces + + for observation_space_name, observation_space in state_space.items(): + if len(observation_space.shape) == 3: + # we assume gym has image observations (with arbitrary number of channels) where their values are + # within 0-255, and where the channel dimension is the last dimension + self.state_space[observation_space_name] = ImageObservationSpace( + shape=np.array(observation_space.shape), + high=255, + channels_axis=-1 + ) + else: + self.state_space[observation_space_name] = VectorObservationSpace( + shape=observation_space.shape[0], + low=observation_space.low, + high=observation_space.high + ) + if 'desired_goal' in state_space.keys(): + self.goal_space = self.state_space['desired_goal'] + + # actions + if type(self.env.action_space) == gym.spaces.box.Box: + self.action_space = BoxActionSpace( + shape=self.env.action_space.shape, + low=self.env.action_space.low, + high=self.env.action_space.high + ) + elif type(self.env.action_space) == gym.spaces.discrete.Discrete: + actions_description = [] + if hasattr(self.env.unwrapped, 'get_action_meanings'): + actions_description = self.env.unwrapped.get_action_meanings() + self.action_space = DiscreteActionSpace( + num_actions=self.env.action_space.n, + descriptions=actions_description + ) + + if self.human_control: + # TODO: add this to the action space + # map keyboard keys to actions + self.key_to_action = {} + if hasattr(self.env.unwrapped, 'get_keys_to_action'): + self.key_to_action = self.env.unwrapped.get_keys_to_action() + else: + screen.error("Error: Environment {} does not support human control.".format(self.env), crash=True) + + # initialize the state by getting a new state from the environment + self.reset_internal_state(True) + + # render + if self.is_rendered: + image = self.get_rendered_image() + scale = 1 + if self.human_control: + scale = 2 + if not self.native_rendering: + self.renderer.create_screen(image.shape[1]*scale, image.shape[0]*scale) + + # measurements + if self.env.spec is not None: + self.timestep_limit = self.env.spec.timestep_limit + else: + self.timestep_limit = None + + # the info is only updated after the first step + self.state = self.step(self.action_space.default_action).next_state + self.state_space['measurements'] = VectorObservationSpace(shape=len(self.info.keys())) + + if self.env.spec and custom_reward_threshold is None: + self.reward_success_threshold = self.env.spec.reward_threshold + self.reward_space = RewardSpace(1, reward_success_threshold=self.reward_success_threshold) + + self.target_success_rate = target_success_rate + + def _wrap_state(self, state): + if not isinstance(self.env.observation_space, gym.spaces.Dict): + return {'observation': state} + return state + + def _update_state(self): + if self.is_atari_env and hasattr(self, 'current_ale_lives') \ + and self.current_ale_lives != self.env.unwrapped.ale.lives(): + if self.phase == RunPhase.TRAIN or self.phase == RunPhase.HEATUP: + # signal termination for life loss + self.done = True + elif self.phase == RunPhase.TEST and not self.done: + # the episode is not terminated in evaluation, but we need to press fire again + self._press_fire() + self._update_ale_lives() + # TODO: update the measurements + if self.state and "desired_goal" in self.state.keys(): + self.goal = self.state['desired_goal'] + + def _take_action(self, action): + if type(self.action_space) == BoxActionSpace: + action = self.action_space.clip_action_to_space(action) + + self.state, self.reward, self.done, self.info = self.env.step(action) + self.state = self._wrap_state(self.state) + + def _random_noop(self): + # simulate a random initial environment state by stepping for a random number of times between 0 and 30 + step_count = 0 + random_initialization_steps = random.randint(0, self.random_initialization_steps) + while self.action_space is not None and (self.state is None or step_count < random_initialization_steps): + step_count += 1 + self.step(self.action_space.default_action) + + def _press_fire(self): + fire_action = 1 + if self.is_atari_env and self.env.unwrapped.get_action_meanings()[fire_action] == 'FIRE': + self.current_ale_lives = self.env.unwrapped.ale.lives() + self.step(fire_action) + if self.done: + self.reset_internal_state() + + def _update_ale_lives(self): + if self.is_atari_env: + self.current_ale_lives = self.env.unwrapped.ale.lives() + + def _restart_environment_episode(self, force_environment_reset=False): + # prevent reset of environment if there are ale lives left + if (self.is_atari_env and self.env.unwrapped.ale.lives() > 0) \ + and not force_environment_reset and not self.timelimit_env_wrapper._past_limit(): + self.step(self.action_space.default_action) + else: + self.state = self.env.reset() + self.state = self._wrap_state(self.state) + self._update_ale_lives() + + if self.is_atari_env: + self._random_noop() + self._press_fire() + + # initialize the number of lives + self._update_ale_lives() + + def _set_mujoco_camera(self, camera_idx: int): + """ + This function can be used to set the camera for rendering the mujoco simulator + :param camera_idx: The index of the camera to use. Should be defined in the model + :return: None + """ + if self.env.unwrapped.viewer is not None and self.env.unwrapped.viewer.cam.fixedcamid != camera_idx and\ + self.env.unwrapped.viewer._ncam > camera_idx: + from mujoco_py.generated import const + self.env.unwrapped.viewer.cam.type = const.CAMERA_FIXED + self.env.unwrapped.viewer.cam.fixedcamid = camera_idx + + def _get_robotics_image(self): + self.env.render() + image = self.env.unwrapped._get_viewer().read_pixels(1600, 900, depth=False)[::-1, :, :] + image = scipy.misc.imresize(image, (270, 480, 3)) + return image + + def _render(self): + self.env.render(mode='human') + # required for setting up a fixed camera for mujoco + if self.is_mujoco_env: + self._set_mujoco_camera(0) + + def get_rendered_image(self): + if self.is_robotics_env: + # necessary for fetch since the rendered image is cropped to an irrelevant part of the simulator + image = self._get_robotics_image() + else: + image = self.env.render(mode='rgb_array') + # required for setting up a fixed camera for mujoco + if self.is_mujoco_env: + self._set_mujoco_camera(0) + return image + + def get_target_success_rate(self) -> float: + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/environments/starcraft2_environment.html b/docs/_modules/rl_coach/environments/starcraft2_environment.html new file mode 100644 index 0000000..3e3d8c8 --- /dev/null +++ b/docs/_modules/rl_coach/environments/starcraft2_environment.html @@ -0,0 +1,478 @@ + + + + + + + + + + + rl_coach.environments.starcraft2_environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.environments.starcraft2_environment
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.environments.starcraft2_environment

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from enum import Enum
+from typing import Union, List
+
+import numpy as np
+
+from rl_coach.filters.observation.observation_move_axis_filter import ObservationMoveAxisFilter
+
+try:
+    from pysc2 import maps
+    from pysc2.env import sc2_env
+    from pysc2.env import available_actions_printer
+    from pysc2.lib import actions
+    from pysc2.lib import features
+    from pysc2.env import environment
+    from absl import app
+    from absl import flags
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("PySc2")
+
+from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
+from rl_coach.base_parameters import VisualizationParameters
+from rl_coach.spaces import BoxActionSpace, VectorObservationSpace, PlanarMapsObservationSpace, StateSpace, CompoundActionSpace, \
+    DiscreteActionSpace
+from rl_coach.filters.filter import InputFilter, OutputFilter
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
+from rl_coach.filters.action.linear_box_to_box_map import LinearBoxToBoxMap
+from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
+
+FLAGS = flags.FLAGS
+FLAGS(['coach.py'])
+
+SCREEN_SIZE = 84  # will also impact the action space size
+
+# Starcraft Constants
+_NOOP = actions.FUNCTIONS.no_op.id
+_MOVE_SCREEN = actions.FUNCTIONS.Move_screen.id
+_SELECT_ARMY = actions.FUNCTIONS.select_army.id
+_PLAYER_RELATIVE = features.SCREEN_FEATURES.player_relative.index
+_NOT_QUEUED = [0]
+_SELECT_ALL = [0]
+
+
+class StarcraftObservationType(Enum):
+    Features = 0
+    RGB = 1
+
+
+StarcraftInputFilter = InputFilter(is_a_reference_filter=True)
+StarcraftInputFilter.add_observation_filter('screen', 'move_axis', ObservationMoveAxisFilter(0, -1))
+StarcraftInputFilter.add_observation_filter('screen', 'rescaling',
+                                            ObservationRescaleToSizeFilter(
+                                                PlanarMapsObservationSpace(np.array([84, 84, 1]),
+                                                                           low=0, high=255, channels_axis=-1)))
+StarcraftInputFilter.add_observation_filter('screen', 'to_uint8', ObservationToUInt8Filter(0, 255))
+
+StarcraftInputFilter.add_observation_filter('minimap', 'move_axis', ObservationMoveAxisFilter(0, -1))
+StarcraftInputFilter.add_observation_filter('minimap', 'rescaling',
+                                            ObservationRescaleToSizeFilter(
+                                                PlanarMapsObservationSpace(np.array([64, 64, 1]),
+                                                                           low=0, high=255, channels_axis=-1)))
+StarcraftInputFilter.add_observation_filter('minimap', 'to_uint8', ObservationToUInt8Filter(0, 255))
+
+
+StarcraftNormalizingOutputFilter = OutputFilter(is_a_reference_filter=True)
+StarcraftNormalizingOutputFilter.add_action_filter(
+    'normalization', LinearBoxToBoxMap(input_space_low=-SCREEN_SIZE / 2, input_space_high=SCREEN_SIZE / 2 - 1))
+
+
+class StarCraft2EnvironmentParameters(EnvironmentParameters):
+    def __init__(self, level=None):
+        super().__init__(level=level)
+        self.screen_size = 84
+        self.minimap_size = 64
+        self.feature_minimap_maps_to_use = range(7)
+        self.feature_screen_maps_to_use = range(17)
+        self.observation_type = StarcraftObservationType.Features
+        self.disable_fog = False
+        self.auto_select_all_army = True
+        self.default_input_filter = StarcraftInputFilter
+        self.default_output_filter = StarcraftNormalizingOutputFilter
+        self.use_full_action_space = False
+
+
+    @property
+    def path(self):
+        return 'rl_coach.environments.starcraft2_environment:StarCraft2Environment'
+
+
+# Environment
+
[docs]class StarCraft2Environment(Environment): + def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, + target_success_rate: float=1.0, seed: Union[None, int]=None, human_control: bool=False, + custom_reward_threshold: Union[int, float]=None, + screen_size: int=84, minimap_size: int=64, + feature_minimap_maps_to_use: List=range(7), feature_screen_maps_to_use: List=range(17), + observation_type: StarcraftObservationType=StarcraftObservationType.Features, + disable_fog: bool=False, auto_select_all_army: bool=True, + use_full_action_space: bool=False, **kwargs): + super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) + + self.screen_size = screen_size + self.minimap_size = minimap_size + self.feature_minimap_maps_to_use = feature_minimap_maps_to_use + self.feature_screen_maps_to_use = feature_screen_maps_to_use + self.observation_type = observation_type + self.features_screen_size = None + self.feature_minimap_size = None + self.rgb_screen_size = None + self.rgb_minimap_size = None + if self.observation_type == StarcraftObservationType.Features: + self.features_screen_size = screen_size + self.feature_minimap_size = minimap_size + elif self.observation_type == StarcraftObservationType.RGB: + self.rgb_screen_size = screen_size + self.rgb_minimap_size = minimap_size + self.disable_fog = disable_fog + self.auto_select_all_army = auto_select_all_army + self.use_full_action_space = use_full_action_space + + # step_mul is the equivalent to frame skipping. Not sure if it repeats actions in between or not though. + self.env = sc2_env.SC2Env(map_name=self.env_id, step_mul=frame_skip, + visualize=self.is_rendered, + agent_interface_format=sc2_env.AgentInterfaceFormat( + feature_dimensions=sc2_env.Dimensions( + screen=self.features_screen_size, + minimap=self.feature_minimap_size + ) + # rgb_dimensions=sc2_env.Dimensions( + # screen=self.rgb_screen_size, + # minimap=self.rgb_screen_size + # ) + ), + # feature_screen_size=self.features_screen_size, + # feature_minimap_size=self.feature_minimap_size, + # rgb_screen_size=self.rgb_screen_size, + # rgb_minimap_size=self.rgb_screen_size, + disable_fog=disable_fog, + random_seed=self.seed + ) + + # print all the available actions + # self.env = available_actions_printer.AvailableActionsPrinter(self.env) + + self.reset_internal_state(True) + + """ + feature_screen: [height_map, visibility_map, creep, power, player_id, player_relative, unit_type, selected, + unit_hit_points, unit_hit_points_ratio, unit_energy, unit_energy_ratio, unit_shields, + unit_shields_ratio, unit_density, unit_density_aa, effects] + feature_minimap: [height_map, visibility_map, creep, camera, player_id, player_relative, selecte + d] + player: [player_id, minerals, vespene, food_cap, food_army, food_workers, idle_worker_dount, + army_count, warp_gate_count, larva_count] + """ + self.screen_shape = np.array(self.env.observation_spec()[0]['feature_screen']) + self.screen_shape[0] = len(self.feature_screen_maps_to_use) + self.minimap_shape = np.array(self.env.observation_spec()[0]['feature_minimap']) + self.minimap_shape[0] = len(self.feature_minimap_maps_to_use) + self.state_space = StateSpace({ + "screen": PlanarMapsObservationSpace(shape=self.screen_shape, low=0, high=255, channels_axis=0), + "minimap": PlanarMapsObservationSpace(shape=self.minimap_shape, low=0, high=255, channels_axis=0), + "measurements": VectorObservationSpace(self.env.observation_spec()[0]["player"][0]) + }) + if self.use_full_action_space: + action_identifiers = list(self.env.action_spec()[0].functions) + num_action_identifiers = len(action_identifiers) + action_arguments = [(arg.name, arg.sizes) for arg in self.env.action_spec()[0].types] + sub_action_spaces = [DiscreteActionSpace(num_action_identifiers)] + for argument in action_arguments: + for dimension in argument[1]: + sub_action_spaces.append(DiscreteActionSpace(dimension)) + self.action_space = CompoundActionSpace(sub_action_spaces) + else: + self.action_space = BoxActionSpace(2, 0, self.screen_size - 1, ["X-Axis, Y-Axis"], + default_action=np.array([self.screen_size/2, self.screen_size/2])) + + self.target_success_rate = target_success_rate + + def _update_state(self): + timestep = 0 + self.screen = self.last_result[timestep].observation.feature_screen + # extract only the requested segmentation maps from the observation + self.screen = np.take(self.screen, self.feature_screen_maps_to_use, axis=0) + self.minimap = self.last_result[timestep].observation.feature_minimap + self.measurements = self.last_result[timestep].observation.player + self.reward = self.last_result[timestep].reward + self.done = self.last_result[timestep].step_type == environment.StepType.LAST + self.state = { + 'screen': self.screen, + 'minimap': self.minimap, + 'measurements': self.measurements + } + + def _take_action(self, action): + if self.use_full_action_space: + action_identifier = action[0] + action_arguments = action[1:] + action = actions.FunctionCall(action_identifier, action_arguments) + else: + coord = np.array(action[0:2]) + noop = False + coord = coord.round() + coord = np.clip(coord, 0, SCREEN_SIZE - 1) + self.last_action_idx = coord + + if noop: + action = actions.FunctionCall(_NOOP, []) + else: + action = actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, coord]) + + self.last_result = self.env.step(actions=[action]) + + def _restart_environment_episode(self, force_environment_reset=False): + # reset the environment + self.last_result = self.env.reset() + + # select all the units on the screen + if self.auto_select_all_army: + self.env.step(actions=[actions.FunctionCall(_SELECT_ARMY, [_SELECT_ALL])]) + + def get_rendered_image(self): + screen = np.squeeze(np.tile(np.expand_dims(self.screen, -1), (1, 1, 3))) + screen = screen / np.max(screen) * 255 + return screen.astype('uint8') + + def dump_video_of_last_episode(self): + from rl_coach.logger import experiment_path + self.env._run_config.replay_dir = experiment_path + self.env.save_replay('replays') + super().dump_video_of_last_episode() + + def get_target_success_rate(self): + return self.target_success_rate
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/additive_noise.html b/docs/_modules/rl_coach/exploration_policies/additive_noise.html new file mode 100644 index 0000000..a27fc99 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/additive_noise.html @@ -0,0 +1,330 @@ + + + + + + + + + + + rl_coach.exploration_policies.additive_noise — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.additive_noise
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.additive_noise

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.schedules import Schedule, LinearSchedule
+from rl_coach.spaces import ActionSpace, BoxActionSpace
+
+
+# TODO: consider renaming to gaussian sampling
+class AdditiveNoiseParameters(ExplorationParameters):
+    def __init__(self):
+        super().__init__()
+        self.noise_percentage_schedule = LinearSchedule(0.1, 0.1, 50000)
+        self.evaluation_noise_percentage = 0.05
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.additive_noise:AdditiveNoise'
+
+
+
[docs]class AdditiveNoise(ExplorationPolicy): + """ + AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent + and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that + can be given in two different ways: + 1. Specified by the user as a noise schedule which is taken in percentiles out of the action space size + 2. Specified by the agents action. In case the agents action is a list with 2 values, the 1st one is assumed to + be the mean of the action, and 2nd is assumed to be its standard deviation. + """ + def __init__(self, action_space: ActionSpace, noise_percentage_schedule: Schedule, + evaluation_noise_percentage: float): + """ + :param action_space: the action space used by the environment + :param noise_percentage_schedule: the schedule for the noise variance percentage relative to the absolute range + of the action space + :param evaluation_noise_percentage: the noise variance percentage that will be used during evaluation phases + """ + super().__init__(action_space) + self.noise_percentage_schedule = noise_percentage_schedule + self.evaluation_noise_percentage = evaluation_noise_percentage + + if not isinstance(action_space, BoxActionSpace): + raise ValueError("Additive noise exploration works only for continuous controls." + "The given action space is of type: {}".format(action_space.__class__.__name__)) + + if not np.all(-np.inf < action_space.high) or not np.all(action_space.high < np.inf)\ + or not np.all(-np.inf < action_space.low) or not np.all(action_space.low < np.inf): + raise ValueError("Additive noise exploration requires bounded actions") + + # TODO: allow working with unbounded actions by defining the noise in terms of range and not percentage + + def get_action(self, action_values: List[ActionType]) -> ActionType: + # TODO-potential-bug consider separating internally defined stdev and externally defined stdev into 2 policies + + # set the current noise percentage + if self.phase == RunPhase.TEST: + current_noise_precentage = self.evaluation_noise_percentage + else: + current_noise_precentage = self.noise_percentage_schedule.current_value + + # scale the noise to the action space range + action_values_std = current_noise_precentage * (self.action_space.high - self.action_space.low) + + # extract the mean values + if isinstance(action_values, list): + # the action values are expected to be a list with the action mean and optionally the action stdev + action_values_mean = action_values[0].squeeze() + else: + # the action values are expected to be a numpy array representing the action mean + action_values_mean = action_values.squeeze() + + # step the noise schedule + if self.phase == RunPhase.TRAIN: + self.noise_percentage_schedule.step() + # the second element of the list is assumed to be the standard deviation + if isinstance(action_values, list) and len(action_values) > 1: + action_values_std = action_values[1].squeeze() + + # add noise to the action means + action = np.random.normal(action_values_mean, action_values_std) + + return action + + def get_control_param(self): + return np.ones(self.action_space.shape)*self.noise_percentage_schedule.current_value
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/boltzmann.html b/docs/_modules/rl_coach/exploration_policies/boltzmann.html new file mode 100644 index 0000000..4affe8f --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/boltzmann.html @@ -0,0 +1,292 @@ + + + + + + + + + + + rl_coach.exploration_policies.boltzmann — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.boltzmann
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.boltzmann

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.schedules import Schedule
+from rl_coach.spaces import ActionSpace
+
+
+class BoltzmannParameters(ExplorationParameters):
+    def __init__(self):
+        super().__init__()
+        self.temperature_schedule = None
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.boltzmann:Boltzmann'
+
+
+
+
[docs]class Boltzmann(ExplorationPolicy): + """ + The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible + actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values + into a distribution over the actions. It then samples the action for playing out of the calculated distribution. + An additional temperature schedule can be given by the user, and will control the steepness of the softmax function. + """ + def __init__(self, action_space: ActionSpace, temperature_schedule: Schedule): + """ + :param action_space: the action space used by the environment + :param temperature_schedule: the schedule for the temperature parameter of the softmax + """ + super().__init__(action_space) + self.temperature_schedule = temperature_schedule + + def get_action(self, action_values: List[ActionType]) -> ActionType: + if self.phase == RunPhase.TRAIN: + self.temperature_schedule.step() + # softmax calculation + exp_probabilities = np.exp(action_values / self.temperature_schedule.current_value) + probabilities = exp_probabilities / np.sum(exp_probabilities) + # make sure probs sum to 1 + probabilities[-1] = 1 - np.sum(probabilities[:-1]) + # choose actions according to the probabilities + return np.random.choice(range(self.action_space.shape), p=probabilities) + + def get_control_param(self): + return self.temperature_schedule.current_value
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/bootstrapped.html b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html new file mode 100644 index 0000000..d1d3821 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/bootstrapped.html @@ -0,0 +1,315 @@ + + + + + + + + + + + rl_coach.exploration_policies.bootstrapped — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.bootstrapped
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.bootstrapped

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
+from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
+from rl_coach.schedules import Schedule, LinearSchedule
+from rl_coach.spaces import ActionSpace
+
+
+class BootstrappedParameters(EGreedyParameters):
+    def __init__(self):
+        super().__init__()
+        self.architecture_num_q_heads = 10
+        self.bootstrapped_data_sharing_probability = 1.0
+        self.epsilon_schedule = LinearSchedule(1, 0.01, 1000000)
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.bootstrapped:Bootstrapped'
+
+
+
[docs]class Bootstrapped(EGreedy): + """ + Bootstrapped exploration policy is currently only used for discrete action spaces along with the + Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the + values for all the possible actions. For each episode, a single head is selected to lead the agent, according + to its value predictions. In evaluation, the action is selected using a majority vote over all the heads + predictions. + + .. note:: + This exploration policy will only work for Discrete action spaces with Bootstrapped DQN style agents, + since it requires the agent to have a network with multiple heads. + """ + def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule, evaluation_epsilon: float, + architecture_num_q_heads: int, + continuous_exploration_policy_parameters: ExplorationParameters = AdditiveNoiseParameters(),): + """ + :param action_space: the action space used by the environment + :param epsilon_schedule: a schedule for the epsilon values + :param evaluation_epsilon: the epsilon value to use for evaluation phases + :param continuous_exploration_policy_parameters: the parameters of the continuous exploration policy to use + if the e-greedy is used for a continuous policy + :param architecture_num_q_heads: the number of q heads to select from + """ + super().__init__(action_space, epsilon_schedule, evaluation_epsilon, continuous_exploration_policy_parameters) + self.num_heads = architecture_num_q_heads + self.selected_head = 0 + self.last_action_values = 0 + + def select_head(self): + self.selected_head = np.random.randint(self.num_heads) + + def get_action(self, action_values: List[ActionType]) -> ActionType: + # action values are none in case the exploration policy is going to select a random action + if action_values is not None: + if self.phase == RunPhase.TRAIN: + action_values = action_values[self.selected_head] + else: + # ensemble voting for evaluation + top_action_votings = np.argmax(action_values, axis=-1) + counts = np.bincount(top_action_votings.squeeze()) + top_action = np.argmax(counts) + # convert the top action to a one hot vector and pass it to e-greedy + action_values = np.eye(len(self.action_space.actions))[[top_action]] + self.last_action_values = action_values + return super().get_action(action_values) + + def get_control_param(self): + return self.selected_head
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/categorical.html b/docs/_modules/rl_coach/exploration_policies/categorical.html new file mode 100644 index 0000000..e901c8e --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/categorical.html @@ -0,0 +1,281 @@ + + + + + + + + + + + rl_coach.exploration_policies.categorical — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.categorical
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.categorical

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.spaces import ActionSpace
+
+
+class CategoricalParameters(ExplorationParameters):
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.categorical:Categorical'
+
+
+
[docs]class Categorical(ExplorationPolicy): + """ + Categorical exploration policy is intended for discrete action spaces. It expects the action values to + represent a probability distribution over the action, from which a single action will be sampled. + In evaluation, the action that has the highest probability will be selected. This is particularly useful for + actor-critic schemes, where the actors output is a probability distribution over the actions. + """ + def __init__(self, action_space: ActionSpace): + """ + :param action_space: the action space used by the environment + """ + super().__init__(action_space) + + def get_action(self, action_values: List[ActionType]) -> ActionType: + if self.phase == RunPhase.TRAIN: + # choose actions according to the probabilities + return np.random.choice(self.action_space.actions, p=action_values) + else: + # take the action with the highest probability + return np.argmax(action_values) + + def get_control_param(self): + return 0
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html new file mode 100644 index 0000000..71451b9 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/continuous_entropy.html @@ -0,0 +1,265 @@ + + + + + + + + + + + rl_coach.exploration_policies.continuous_entropy — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.continuous_entropy
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.continuous_entropy

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from rl_coach.exploration_policies.additive_noise import AdditiveNoise, AdditiveNoiseParameters
+
+
+class ContinuousEntropyParameters(AdditiveNoiseParameters):
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.continuous_entropy:ContinuousEntropy'
+
+
+
[docs]class ContinuousEntropy(AdditiveNoise): + """ + Continuous entropy is an exploration policy that is actually implemented as part of the network. + The exploration policy class is only a placeholder for choosing this policy. The exploration policy is + implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action. + This exploration policy is only intended for continuous action spaces, and assumes that the entire calculation + is implemented as part of the head. + + .. warning:: + This exploration policy expects the agent or the network to implement the exploration functionality. + There are only a few heads that actually are relevant and implement the entropy regularization factor. + """ + pass
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/e_greedy.html b/docs/_modules/rl_coach/exploration_policies/e_greedy.html new file mode 100644 index 0000000..66112ff --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/e_greedy.html @@ -0,0 +1,342 @@ + + + + + + + + + + + rl_coach.exploration_policies.e_greedy — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.e_greedy
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.e_greedy

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy
+from rl_coach.schedules import Schedule, LinearSchedule
+from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
+from rl_coach.utils import dynamic_import_and_instantiate_module_from_params
+
+
+class EGreedyParameters(ExplorationParameters):
+    def __init__(self):
+        super().__init__()
+        self.epsilon_schedule = LinearSchedule(0.5, 0.01, 50000)
+        self.evaluation_epsilon = 0.05
+        self.continuous_exploration_policy_parameters = AdditiveNoiseParameters()
+        self.continuous_exploration_policy_parameters.noise_percentage_schedule = LinearSchedule(0.1, 0.1, 50000)
+        # for continuous control -
+        # (see http://www.cs.ubc.ca/~van/papers/2017-TOG-deepLoco/2017-TOG-deepLoco.pdf)
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.e_greedy:EGreedy'
+
+
+
[docs]class EGreedy(ExplorationPolicy): + """ + e-greedy is an exploration policy that is intended for both discrete and continuous action spaces. + + For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the + highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the + possible actions. The epsilon value is given by the user and can be given as a schedule. + In evaluation, a different epsilon value can be specified. + + For continuous action spaces, it assumes that the mean action is given by the agent. With probability epsilon, + it samples a random action out of the action space bounds. Otherwise, it selects the action according to a + given continuous exploration policy, which is set to AdditiveNoise by default. In evaluation, the action is + always selected according to the given continuous exploration policy (where its phase is set to evaluation as well). + """ + def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule, + evaluation_epsilon: float, + continuous_exploration_policy_parameters: ExplorationParameters=AdditiveNoiseParameters()): + """ + :param action_space: the action space used by the environment + :param epsilon_schedule: a schedule for the epsilon values + :param evaluation_epsilon: the epsilon value to use for evaluation phases + :param continuous_exploration_policy_parameters: the parameters of the continuous exploration policy to use + if the e-greedy is used for a continuous policy + """ + super().__init__(action_space) + self.epsilon_schedule = epsilon_schedule + self.evaluation_epsilon = evaluation_epsilon + + if isinstance(self.action_space, BoxActionSpace): + # for continuous e-greedy (see http://www.cs.ubc.ca/~van/papers/2017-TOG-deepLoco/2017-TOG-deepLoco.pdf) + continuous_exploration_policy_parameters.action_space = action_space + self.continuous_exploration_policy = \ + dynamic_import_and_instantiate_module_from_params(continuous_exploration_policy_parameters) + + self.current_random_value = np.random.rand() + + def requires_action_values(self): + epsilon = self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon_schedule.current_value + return self.current_random_value >= epsilon + + def get_action(self, action_values: List[ActionType]) -> ActionType: + epsilon = self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon_schedule.current_value + + if isinstance(self.action_space, DiscreteActionSpace): + top_action = np.argmax(action_values) + if self.current_random_value < epsilon: + chosen_action = self.action_space.sample() + else: + chosen_action = top_action + else: + if self.current_random_value < epsilon and self.phase == RunPhase.TRAIN: + chosen_action = self.action_space.sample() + else: + chosen_action = self.continuous_exploration_policy.get_action(action_values) + + # step the epsilon schedule and generate a new random value for next time + if self.phase == RunPhase.TRAIN: + self.epsilon_schedule.step() + self.current_random_value = np.random.rand() + return chosen_action + + def get_control_param(self): + if isinstance(self.action_space, DiscreteActionSpace): + return self.evaluation_epsilon if self.phase == RunPhase.TEST else self.epsilon_schedule.current_value + elif isinstance(self.action_space, BoxActionSpace): + return self.continuous_exploration_policy.get_control_param() + + def change_phase(self, phase): + super().change_phase(phase) + if isinstance(self.action_space, BoxActionSpace): + self.continuous_exploration_policy.change_phase(phase)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/exploration_policy.html b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html new file mode 100644 index 0000000..df2bf93 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/exploration_policy.html @@ -0,0 +1,311 @@ + + + + + + + + + + + rl_coach.exploration_policies.exploration_policy — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.exploration_policy
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.exploration_policy

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+from rl_coach.base_parameters import Parameters
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.spaces import ActionSpace
+
+
+class ExplorationParameters(Parameters):
+    def __init__(self):
+        self.action_space = None
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.exploration_policy:ExplorationPolicy'
+
+
+
[docs]class ExplorationPolicy(object): + """ + An exploration policy takes the predicted actions or action values from the agent, and selects the action to + actually apply to the environment using some predefined algorithm. + """ + def __init__(self, action_space: ActionSpace): + """ + :param action_space: the action space used by the environment + """ + self.phase = RunPhase.HEATUP + self.action_space = action_space + +
[docs] def reset(self): + """ + Used for resetting the exploration policy parameters when needed + :return: None + """ + pass
+ +
[docs] def get_action(self, action_values: List[ActionType]) -> ActionType: + """ + Given a list of values corresponding to each action, + choose one actions according to the exploration policy + :param action_values: A list of action values + :return: The chosen action + """ + if self.__class__ == ExplorationPolicy: + raise ValueError("The ExplorationPolicy class is an abstract class and should not be used directly. " + "Please set the exploration parameters to point to an inheriting class like EGreedy or " + "AdditiveNoise") + else: + raise ValueError("The get_action function should be overridden in the inheriting exploration class")
+ +
[docs] def change_phase(self, phase): + """ + Change between running phases of the algorithm + :param phase: Either Heatup or Train + :return: none + """ + self.phase = phase
+ +
[docs] def requires_action_values(self) -> bool: + """ + Allows exploration policies to define if they require the action values for the current step. + This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller + than epsilon, the action is completely random, and the action values don't need to be calculated + :return: True if the action values are required. False otherwise + """ + return True
+ + def get_control_param(self): + return 0
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/greedy.html b/docs/_modules/rl_coach/exploration_policies/greedy.html new file mode 100644 index 0000000..b031dbe --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/greedy.html @@ -0,0 +1,278 @@ + + + + + + + + + + + rl_coach.exploration_policies.greedy — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.greedy
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.greedy

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.spaces import ActionSpace, DiscreteActionSpace, BoxActionSpace
+
+
+class GreedyParameters(ExplorationParameters):
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.greedy:Greedy'
+
+
+
[docs]class Greedy(ExplorationPolicy): + """ + The Greedy exploration policy is intended for both discrete and continuous action spaces. + For discrete action spaces, it always selects the action with the maximum value, as given by the agent. + For continuous action spaces, it always return the exact action, as it was given by the agent. + """ + def __init__(self, action_space: ActionSpace): + """ + :param action_space: the action space used by the environment + """ + super().__init__(action_space) + + def get_action(self, action_values: List[ActionType]) -> ActionType: + if type(self.action_space) == DiscreteActionSpace: + return np.argmax(action_values) + if type(self.action_space) == BoxActionSpace: + return action_values + + def get_control_param(self): + return 0
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/ou_process.html b/docs/_modules/rl_coach/exploration_policies/ou_process.html new file mode 100644 index 0000000..390dcba --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/ou_process.html @@ -0,0 +1,313 @@ + + + + + + + + + + + rl_coach.exploration_policies.ou_process — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.ou_process
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.ou_process

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.spaces import ActionSpace, BoxActionSpace, GoalsSpace
+
+
+# Based on on the description in:
+# https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
+class OUProcessParameters(ExplorationParameters):
+    def __init__(self):
+        super().__init__()
+        self.mu = 0
+        self.theta = 0.15
+        self.sigma = 0.2
+        self.dt = 0.01
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.ou_process:OUProcess'
+
+
+# Ornstein-Uhlenbeck process
+
[docs]class OUProcess(ExplorationPolicy): + """ + OUProcess exploration policy is intended for continuous action spaces, and selects the action according to + an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where + the samples are correlated between consequent time steps. + """ + def __init__(self, action_space: ActionSpace, mu: float=0, theta: float=0.15, sigma: float=0.2, dt: float=0.01): + """ + :param action_space: the action space used by the environment + """ + super().__init__(action_space) + self.mu = float(mu) * np.ones(self.action_space.shape) + self.theta = float(theta) + self.sigma = float(sigma) * np.ones(self.action_space.shape) + self.state = np.zeros(self.action_space.shape) + self.dt = dt + + if not (isinstance(action_space, BoxActionSpace) or isinstance(action_space, GoalsSpace)): + raise ValueError("OU process exploration works only for continuous controls." + "The given action space is of type: {}".format(action_space.__class__.__name__)) + + def reset(self): + self.state = np.zeros(self.action_space.shape) + + def noise(self): + x = self.state + dx = self.theta * (self.mu - x) * self.dt + self.sigma * np.random.randn(len(x)) * np.sqrt(self.dt) + self.state = x + dx + return self.state + + def get_action(self, action_values: List[ActionType]) -> ActionType: + if self.phase == RunPhase.TRAIN: + noise = self.noise() + else: + noise = np.zeros(self.action_space.shape) + + action = action_values.squeeze() + noise + + return action + + def get_control_param(self): + if self.phase == RunPhase.TRAIN: + return self.state + else: + return np.zeros(self.action_space.shape)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/parameter_noise.html b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html new file mode 100644 index 0000000..79a24c8 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/parameter_noise.html @@ -0,0 +1,314 @@ + + + + + + + + + + + rl_coach.exploration_policies.parameter_noise — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.parameter_noise
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.parameter_noise

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Dict
+
+import numpy as np
+
+from rl_coach.agents.dqn_agent import DQNAgentParameters
+from rl_coach.architectures.tensorflow_components.layers import NoisyNetDense
+from rl_coach.base_parameters import AgentParameters, NetworkParameters
+from rl_coach.spaces import ActionSpace, BoxActionSpace, DiscreteActionSpace
+
+from rl_coach.core_types import ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+
+
+class ParameterNoiseParameters(ExplorationParameters):
+    def __init__(self, agent_params: AgentParameters):
+        super().__init__()
+        if not isinstance(agent_params, DQNAgentParameters):
+            raise ValueError("Currently only DQN variants are supported for using an exploration type of "
+                             "ParameterNoise.")
+
+        self.network_params = agent_params.network_wrappers
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.parameter_noise:ParameterNoise'
+
+
+
[docs]class ParameterNoise(ExplorationPolicy): + """ + The ParameterNoise exploration policy is intended for both discrete and continuous action spaces. + It applies the exploration policy by replacing all the dense network layers with noisy layers. + The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network + the weights are sampled from a normal distribution that follows the learned weights mean and standard deviation + values. + + Warning: currently supported only by DQN variants + """ + def __init__(self, network_params: Dict[str, NetworkParameters], action_space: ActionSpace): + """ + :param action_space: the action space used by the environment + """ + super().__init__(action_space) + self.network_params = network_params + self._replace_network_dense_layers() + + def get_action(self, action_values: List[ActionType]) -> ActionType: + if type(self.action_space) == DiscreteActionSpace: + return np.argmax(action_values) + elif type(self.action_space) == BoxActionSpace: + action_values_mean = action_values[0].squeeze() + action_values_std = action_values[1].squeeze() + return np.random.normal(action_values_mean, action_values_std) + else: + raise ValueError("ActionSpace type {} is not supported for ParameterNoise.".format(type(self.action_space))) + + def get_control_param(self): + return 0 + + def _replace_network_dense_layers(self): + # replace the dense type for all the networks components (embedders, mw, heads) with a NoisyNetDense + + # NOTE: we are changing network params in a non-params class (an already instantiated class), this could have + # been prone to a bug, but since the networks are created very late in the game + # (after agent.init_environment_dependent()_modules is called) - then we are fine. + + for network_wrapper_params in self.network_params.values(): + for component_params in list(network_wrapper_params.input_embedders_parameters.values()) + \ + [network_wrapper_params.middleware_parameters] + \ + network_wrapper_params.heads_parameters: + component_params.dense_layer = NoisyNetDense
+ +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/truncated_normal.html b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html new file mode 100644 index 0000000..62b5033 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/truncated_normal.html @@ -0,0 +1,337 @@ + + + + + + + + + + + rl_coach.exploration_policies.truncated_normal — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.truncated_normal
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.truncated_normal

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+from scipy.stats import truncnorm
+
+from rl_coach.core_types import RunPhase, ActionType
+from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
+from rl_coach.schedules import Schedule, LinearSchedule
+from rl_coach.spaces import ActionSpace, BoxActionSpace
+
+
+class TruncatedNormalParameters(ExplorationParameters):
+    def __init__(self):
+        super().__init__()
+        self.noise_percentage_schedule = LinearSchedule(0.1, 0.1, 50000)
+        self.evaluation_noise_percentage = 0.05
+        self.clip_low = 0
+        self.clip_high = 1
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.truncated_normal:TruncatedNormal'
+
+
+
[docs]class TruncatedNormal(ExplorationPolicy): + """ + The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a + normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t + wo different ways: + 1. Specified by the user as a noise schedule which is taken in percentiles out of the action space size + 2. Specified by the agents action. In case the agents action is a list with 2 values, the 1st one is assumed to + be the mean of the action, and 2nd is assumed to be its standard deviation. + When the sampled action is outside of the action bounds given by the user, it is sampled again and again, until it + is within the bounds. + """ + def __init__(self, action_space: ActionSpace, noise_percentage_schedule: Schedule, + evaluation_noise_percentage: float, clip_low: float, clip_high: float): + """ + :param action_space: the action space used by the environment + :param noise_percentage_schedule: the schedule for the noise variance percentage relative to the absolute range + of the action space + :param evaluation_noise_percentage: the noise variance percentage that will be used during evaluation phases + """ + super().__init__(action_space) + self.noise_percentage_schedule = noise_percentage_schedule + self.evaluation_noise_percentage = evaluation_noise_percentage + self.clip_low = clip_low + self.clip_high = clip_high + + if not isinstance(action_space, BoxActionSpace): + raise ValueError("Truncated normal exploration works only for continuous controls." + "The given action space is of type: {}".format(action_space.__class__.__name__)) + + if not np.all(-np.inf < action_space.high) or not np.all(action_space.high < np.inf)\ + or not np.all(-np.inf < action_space.low) or not np.all(action_space.low < np.inf): + raise ValueError("Additive noise exploration requires bounded actions") + + # TODO: allow working with unbounded actions by defining the noise in terms of range and not percentage + + def get_action(self, action_values: List[ActionType]) -> ActionType: + # set the current noise percentage + if self.phase == RunPhase.TEST: + current_noise_precentage = self.evaluation_noise_percentage + else: + current_noise_precentage = self.noise_percentage_schedule.current_value + + # scale the noise to the action space range + action_values_std = current_noise_precentage * (self.action_space.high - self.action_space.low) + + # extract the mean values + if isinstance(action_values, list): + # the action values are expected to be a list with the action mean and optionally the action stdev + action_values_mean = action_values[0].squeeze() + else: + # the action values are expected to be a numpy array representing the action mean + action_values_mean = action_values.squeeze() + + # step the noise schedule + if self.phase == RunPhase.TRAIN: + self.noise_percentage_schedule.step() + # the second element of the list is assumed to be the standard deviation + if isinstance(action_values, list) and len(action_values) > 1: + action_values_std = action_values[1].squeeze() + + # sample from truncated normal distribution + normalized_low = (self.clip_low - action_values_mean) / action_values_std + normalized_high = (self.clip_high - action_values_mean) / action_values_std + distribution = truncnorm(normalized_low, normalized_high, loc=action_values_mean, scale=action_values_std) + action = distribution.rvs(1) + + return action + + def get_control_param(self): + return np.ones(self.action_space.shape)*self.noise_percentage_schedule.current_value
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/exploration_policies/ucb.html b/docs/_modules/rl_coach/exploration_policies/ucb.html new file mode 100644 index 0000000..b340ae9 --- /dev/null +++ b/docs/_modules/rl_coach/exploration_policies/ucb.html @@ -0,0 +1,319 @@ + + + + + + + + + + + rl_coach.exploration_policies.ucb — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.exploration_policies.ucb
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.exploration_policies.ucb

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import RunPhase, ActionType, EnvironmentSteps
+from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters
+from rl_coach.exploration_policies.e_greedy import EGreedy, EGreedyParameters
+from rl_coach.exploration_policies.exploration_policy import ExplorationParameters
+from rl_coach.schedules import Schedule, LinearSchedule, PieceWiseSchedule
+from rl_coach.spaces import ActionSpace
+
+
+class UCBParameters(EGreedyParameters):
+    def __init__(self):
+        super().__init__()
+        self.architecture_num_q_heads = 10
+        self.bootstrapped_data_sharing_probability = 1.0
+        self.epsilon_schedule = PieceWiseSchedule([
+            (LinearSchedule(1, 0.1, 1000000), EnvironmentSteps(1000000)),
+            (LinearSchedule(0.1, 0.01, 4000000), EnvironmentSteps(4000000))
+        ])
+        self.lamb = 0.1
+
+    @property
+    def path(self):
+        return 'rl_coach.exploration_policies.ucb:UCB'
+
+
+
[docs]class UCB(EGreedy): + """ + UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces. + It assumes that there are multiple network heads that are predicting action values, and that the standard deviation + between the heads predictions represents the uncertainty of the agent in each of the actions. + It then updates the action value estimates to by mean(actions)+lambda*stdev(actions), where lambda is + given by the user. This exploration policy aims to take advantage of the uncertainty of the agent in its predictions, + and select the action according to the tradeoff between how uncertain the agent is, and how large it predicts + the outcome from those actions to be. + """ + def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule, evaluation_epsilon: float, + architecture_num_q_heads: int, lamb: int, + continuous_exploration_policy_parameters: ExplorationParameters = AdditiveNoiseParameters()): + """ + :param action_space: the action space used by the environment + :param epsilon_schedule: a schedule for the epsilon values + :param evaluation_epsilon: the epsilon value to use for evaluation phases + :param architecture_num_q_heads: the number of q heads to select from + :param lamb: lambda coefficient for taking the standard deviation into account + :param continuous_exploration_policy_parameters: the parameters of the continuous exploration policy to use + if the e-greedy is used for a continuous policy + """ + super().__init__(action_space, epsilon_schedule, evaluation_epsilon, continuous_exploration_policy_parameters) + self.num_heads = architecture_num_q_heads + self.lamb = lamb + self.std = 0 + self.last_action_values = 0 + + def select_head(self): + pass + + def get_action(self, action_values: List[ActionType]) -> ActionType: + # action values are none in case the exploration policy is going to select a random action + if action_values is not None: + if self.requires_action_values(): + mean = np.mean(action_values, axis=0) + if self.phase == RunPhase.TRAIN: + self.std = np.std(action_values, axis=0) + self.last_action_values = mean + self.lamb * self.std + else: + self.last_action_values = mean + return super().get_action(self.last_action_values) + + def get_control_param(self): + if self.phase == RunPhase.TRAIN: + return np.mean(self.std) + else: + return 0
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/attention_discretization.html b/docs/_modules/rl_coach/filters/action/attention_discretization.html new file mode 100644 index 0000000..7c11399 --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/attention_discretization.html @@ -0,0 +1,300 @@ + + + + + + + + + + + rl_coach.filters.action.attention_discretization — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.attention_discretization
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.attention_discretization

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union, List
+
+import numpy as np
+
+from rl_coach.filters.action.box_discretization import BoxDiscretization
+from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
+from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteActionSpace
+
+
+
[docs]class AttentionDiscretization(PartialDiscreteActionSpaceMap): + """ + Discretizes an **AttentionActionSpace**. The attention action space defines the actions + as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing + a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop + windows to choose into a finite number of options, and map a discrete action space into those crop windows. + + Warning! this will currently only work for attention spaces with 2 dimensions. + """ + def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False): + """ + :param num_bins_per_dimension: Number of discrete bins to use for each dimension of the action space + :param force_int_bins: If set to True, all the bins will represent integer coordinates in space. + """ + # we allow specifying either a single number for all dimensions, or a single number per dimension in the target + # action space + self.num_bins_per_dimension = num_bins_per_dimension + + self.force_int_bins = force_int_bins + + # TODO: this will currently only work for attention spaces with 2 dimensions. generalize it. + + super().__init__() + + def validate_output_action_space(self, output_action_space: AttentionActionSpace): + if not isinstance(output_action_space, AttentionActionSpace): + raise ValueError("AttentionActionSpace discretization only works with an output space of type AttentionActionSpace. " + "The given output space is {}".format(output_action_space)) + + def get_unfiltered_action_space(self, output_action_space: AttentionActionSpace) -> DiscreteActionSpace: + if isinstance(self.num_bins_per_dimension, int): + self.num_bins_per_dimension = [self.num_bins_per_dimension] * output_action_space.shape[0] + + # create a discrete to linspace map to ease the extraction of attention actions + discrete_to_box = BoxDiscretization([n+1 for n in self.num_bins_per_dimension], + self.force_int_bins) + discrete_to_box.get_unfiltered_action_space(BoxActionSpace(output_action_space.shape, + output_action_space.low, + output_action_space.high), ) + + rows, cols = self.num_bins_per_dimension + start_ind = [i * (cols + 1) + j for i in range(rows + 1) if i < rows for j in range(cols + 1) if j < cols] + end_ind = [i + cols + 2 for i in start_ind] + self.target_actions = [np.array([discrete_to_box.target_actions[start], + discrete_to_box.target_actions[end]]) + for start, end in zip(start_ind, end_ind)] + + return super().get_unfiltered_action_space(output_action_space)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/box_discretization.html b/docs/_modules/rl_coach/filters/action/box_discretization.html new file mode 100644 index 0000000..553dbd2 --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/box_discretization.html @@ -0,0 +1,300 @@ + + + + + + + + + + + rl_coach.filters.action.box_discretization — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.box_discretization
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.box_discretization

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from itertools import product
+from typing import Union, List
+
+import numpy as np
+
+from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
+from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
+
+
+
[docs]class BoxDiscretization(PartialDiscreteActionSpaceMap): + """ + Discretizes a continuous action space into a discrete action space, allowing the usage of + agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the + original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete + action index. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space. + For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action + space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1. + """ + def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False): + """ + :param num_bins_per_dimension: The number of bins to use for each dimension of the target action space. + The bins will be spread out uniformly over this space + :param force_int_bins: force the bins to represent only integer actions. for example, if the action space is in + the range 0-10 and there are 5 bins, then the bins will be placed at 0, 2, 5, 7, 10, + instead of 0, 2.5, 5, 7.5, 10. + """ + # we allow specifying either a single number for all dimensions, or a single number per dimension in the target + # action space + self.num_bins_per_dimension = num_bins_per_dimension + self.force_int_bins = force_int_bins + super().__init__() + + def validate_output_action_space(self, output_action_space: BoxActionSpace): + if not isinstance(output_action_space, BoxActionSpace): + raise ValueError("BoxActionSpace discretization only works with an output space of type BoxActionSpace. " + "The given output space is {}".format(output_action_space)) + + if len(self.num_bins_per_dimension) != output_action_space.shape: + # TODO: this check is not sufficient. it does not deal with actions spaces with more than one axis + raise ValueError("The length of the list of bins per dimension ({}) does not match the number of " + "dimensions in the action space ({})" + .format(len(self.num_bins_per_dimension), output_action_space)) + + def get_unfiltered_action_space(self, output_action_space: BoxActionSpace) -> DiscreteActionSpace: + if isinstance(self.num_bins_per_dimension, int): + self.num_bins_per_dimension = np.ones(output_action_space.shape) * self.num_bins_per_dimension + + bins = [] + for i in range(len(output_action_space.low)): + dim_bins = np.linspace(output_action_space.low[i], output_action_space.high[i], + self.num_bins_per_dimension[i]) + if self.force_int_bins: + dim_bins = dim_bins.astype(int) + bins.append(dim_bins) + self.target_actions = [list(action) for action in list(product(*bins))] + + return super().get_unfiltered_action_space(output_action_space)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/box_masking.html b/docs/_modules/rl_coach/filters/action/box_masking.html new file mode 100644 index 0000000..552de5d --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/box_masking.html @@ -0,0 +1,308 @@ + + + + + + + + + + + rl_coach.filters.action.box_masking — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.box_masking
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.box_masking

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.core_types import ActionType
+from rl_coach.filters.action.action_filter import ActionFilter
+from rl_coach.spaces import BoxActionSpace
+
+
+
[docs]class BoxMasking(ActionFilter): + """ + Masks part of the action space to enforce the agent to work in a defined space. For example, + if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions + to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent. + The resulting action space will be shifted and will always start from 0 and have the size of the unmasked area. + """ + def __init__(self, + masked_target_space_low: Union[None, int, float, np.ndarray], + masked_target_space_high: Union[None, int, float, np.ndarray]): + """ + :param masked_target_space_low: the lowest values that can be chosen in the target action space + :param masked_target_space_high: the highest values that can be chosen in the target action space + """ + self.masked_target_space_low = masked_target_space_low + self.masked_target_space_high = masked_target_space_high + self.offset = masked_target_space_low + super().__init__() + + def set_masking(self, masked_target_space_low: Union[None, int, float, np.ndarray], + masked_target_space_high: Union[None, int, float, np.ndarray]): + self.masked_target_space_low = masked_target_space_low + self.masked_target_space_high = masked_target_space_high + self.offset = masked_target_space_low + if self.output_action_space: + self.validate_output_action_space(self.output_action_space) + self.input_action_space = BoxActionSpace(self.output_action_space.shape, + low=0, + high=self.masked_target_space_high - self.masked_target_space_low) + + def validate_output_action_space(self, output_action_space: BoxActionSpace): + if not isinstance(output_action_space, BoxActionSpace): + raise ValueError("BoxActionSpace discretization only works with an output space of type BoxActionSpace. " + "The given output space is {}".format(output_action_space)) + if self.masked_target_space_low is None or self.masked_target_space_high is None: + raise ValueError("The masking target space size was not set. Please call set_masking.") + if not (np.all(output_action_space.low <= self.masked_target_space_low) + and np.all(self.masked_target_space_low <= output_action_space.high)): + raise ValueError("The low values for masking the action space ({}) are not within the range of the " + "target space (low = {}, high = {})" + .format(self.masked_target_space_low, output_action_space.low, output_action_space.high)) + if not (np.all(output_action_space.low <= self.masked_target_space_high) + and np.all(self.masked_target_space_high <= output_action_space.high)): + raise ValueError("The high values for masking the action space ({}) are not within the range of the " + "target space (low = {}, high = {})" + .format(self.masked_target_space_high, output_action_space.low, output_action_space.high)) + + def get_unfiltered_action_space(self, output_action_space: BoxActionSpace) -> BoxActionSpace: + self.output_action_space = output_action_space + self.input_action_space = BoxActionSpace(output_action_space.shape, + low=0, + high=self.masked_target_space_high - self.masked_target_space_low) + return self.input_action_space + + def filter(self, action: ActionType) -> ActionType: + return action + self.offset
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html b/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html new file mode 100644 index 0000000..a6c6e9c --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/full_discrete_action_space_map.html @@ -0,0 +1,261 @@ + + + + + + + + + + + rl_coach.filters.action.full_discrete_action_space_map — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.full_discrete_action_space_map
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.full_discrete_action_space_map

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
+from rl_coach.spaces import ActionSpace, DiscreteActionSpace
+
+
+
[docs]class FullDiscreteActionSpaceMap(PartialDiscreteActionSpaceMap): + """ + Full map of two countable action spaces. This works in a similar way to the + PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without + masking any actions. + For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those + multiselect actions. + """ + def __init__(self): + super().__init__() + + def get_unfiltered_action_space(self, output_action_space: ActionSpace) -> DiscreteActionSpace: + self.target_actions = output_action_space.actions + return super().get_unfiltered_action_space(output_action_space)
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html b/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html new file mode 100644 index 0000000..1ae9263 --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/linear_box_to_box_map.html @@ -0,0 +1,289 @@ + + + + + + + + + + + rl_coach.filters.action.linear_box_to_box_map — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.linear_box_to_box_map
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.linear_box_to_box_map

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Union
+
+import numpy as np
+
+from rl_coach.core_types import ActionType
+from rl_coach.filters.action.action_filter import ActionFilter
+from rl_coach.spaces import BoxActionSpace
+
+
+
[docs]class LinearBoxToBoxMap(ActionFilter): + """ + A linear mapping of two box action spaces. For example, if the action space of the + environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1, + the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the + action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped + between those values. + """ + def __init__(self, + input_space_low: Union[None, int, float, np.ndarray], + input_space_high: Union[None, int, float, np.ndarray]): + """ + :param input_space_low: the low values of the desired action space + :param input_space_high: the high values of the desired action space + """ + self.input_space_low = input_space_low + self.input_space_high = input_space_high + self.rescale = None + self.offset = None + super().__init__() + + def validate_output_action_space(self, output_action_space: BoxActionSpace): + if not isinstance(output_action_space, BoxActionSpace): + raise ValueError("BoxActionSpace discretization only works with an output space of type BoxActionSpace. " + "The given output space is {}".format(output_action_space)) + + def get_unfiltered_action_space(self, output_action_space: BoxActionSpace) -> BoxActionSpace: + self.input_action_space = BoxActionSpace(output_action_space.shape, self.input_space_low, self.input_space_high) + self.rescale = \ + (output_action_space.high - output_action_space.low) / (self.input_space_high - self.input_space_low) + self.offset = output_action_space.low - self.input_space_low + self.output_action_space = output_action_space + return self.input_action_space + + def filter(self, action: ActionType) -> ActionType: + return self.output_action_space.low + (action - self.input_space_low) * self.rescale
+ +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html b/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html new file mode 100644 index 0000000..acd17e2 --- /dev/null +++ b/docs/_modules/rl_coach/filters/action/partial_discrete_action_space_map.html @@ -0,0 +1,286 @@ + + + + + + + + + + + rl_coach.filters.action.partial_discrete_action_space_map — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.action.partial_discrete_action_space_map
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.action.partial_discrete_action_space_map

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+from rl_coach.core_types import ActionType
+from rl_coach.filters.action.action_filter import ActionFilter
+from rl_coach.spaces import DiscreteActionSpace, ActionSpace
+
+
+
[docs]class PartialDiscreteActionSpaceMap(ActionFilter): + """ + Partial map of two countable action spaces. For example, consider an environment + with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual + MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can + map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to + use regular discrete actions, and mask 3 of the actions from the agent. + """ + def __init__(self, target_actions: List[ActionType]=None, descriptions: List[str]=None): + """ + :param target_actions: A partial list of actions from the target space to map to. + :param descriptions: a list of descriptions of each of the actions + """ + self.target_actions = target_actions + self.descriptions = descriptions + super().__init__() + + def validate_output_action_space(self, output_action_space: ActionSpace): + if not self.target_actions: + raise ValueError("The target actions were not set") + for v in self.target_actions: + if not output_action_space.val_matches_space_definition(v): + raise ValueError("The values in the output actions ({}) do not match the output action " + "space definition ({})".format(v, output_action_space)) + + def get_unfiltered_action_space(self, output_action_space: ActionSpace) -> DiscreteActionSpace: + self.output_action_space = output_action_space + self.input_action_space = DiscreteActionSpace(len(self.target_actions), self.descriptions) + return self.input_action_space + + def filter(self, action: ActionType) -> ActionType: + return self.target_actions[action] + + def reverse_filter(self, action: ActionType) -> ActionType: + return [(action == x).all() for x in self.target_actions].index(True)
+ +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html b/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html new file mode 100644 index 0000000..f750adc --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_clipping_filter.html @@ -0,0 +1,274 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_clipping_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_clipping_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_clipping_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationClippingFilter(ObservationFilter): + """ + Clips the observation values to a given range of values. + For example, if the observation consists of measurements in an arbitrary range, + and we want to control the minimum and maximum values of these observations, + we can define a range and clip the values of the measurements. + """ + def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf): + """ + :param clipping_low: The minimum value to allow after normalizing the observation + :param clipping_high: The maximum value to allow after normalizing the observation + """ + super().__init__() + self.clip_min = clipping_low + self.clip_max = clipping_high + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + observation = np.clip(observation, self.clip_min, self.clip_max) + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html b/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html new file mode 100644 index 0000000..b8f926c --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_crop_filter.html @@ -0,0 +1,321 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_crop_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_crop_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_crop_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Union, Tuple
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationCropFilter(ObservationFilter): + """ + Crops the size of the observation to a given crop window. For example, in Atari, the + observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a + square of 160x160 before rescaling them. + """ + def __init__(self, crop_low: np.ndarray=None, crop_high: np.ndarray=None): + """ + :param crop_low: a vector where each dimension describes the start index for cropping the observation in the + corresponding dimension. a negative value of -1 will be mapped to the max size + :param crop_high: a vector where each dimension describes the end index for cropping the observation in the + corresponding dimension. a negative value of -1 will be mapped to the max size + """ + super().__init__() + if crop_low is None and crop_high is None: + raise ValueError("At least one of crop_low and crop_high should be set to a real value. ") + if crop_low is None: + crop_low = np.array([0] * len(crop_high)) + if crop_high is None: + crop_high = np.array([-1] * len(crop_low)) + + self.crop_low = crop_low + self.crop_high = crop_high + + for h, l in zip(crop_high, crop_low): + if h < l and h != -1: + raise ValueError("Some of the cropping low values are higher than cropping high values") + if np.any(crop_high < -1) or np.any(crop_low < -1): + raise ValueError("Cropping values cannot be negative") + if crop_low.shape != crop_high.shape: + raise ValueError("The low values and high values for cropping must have the same number of dimensions") + if crop_low.dtype != int or crop_high.dtype != int: + raise ValueError("The crop values should be int values, instead they are defined as: {} and {}" + .format(crop_low.dtype, crop_high.dtype)) + + def _replace_negative_one_in_crop_size(self, crop_size: np.ndarray, observation_shape: Union[Tuple, np.ndarray]): + # replace -1 with the max size + crop_size = crop_size.copy() + for i in range(len(observation_shape)): + if crop_size[i] == -1: + crop_size[i] = observation_shape[i] + return crop_size + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + crop_high = self._replace_negative_one_in_crop_size(self.crop_high, input_observation_space.shape) + crop_low = self._replace_negative_one_in_crop_size(self.crop_low, input_observation_space.shape) + if np.any(crop_high > input_observation_space.shape) or \ + np.any(crop_low > input_observation_space.shape): + raise ValueError("The cropping values are outside of the observation space") + if not input_observation_space.is_point_in_space_shape(crop_low) or \ + not input_observation_space.is_point_in_space_shape(crop_high - 1): + raise ValueError("The cropping indices are outside of the observation space") + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + # replace -1 with the max size + crop_high = self._replace_negative_one_in_crop_size(self.crop_high, observation.shape) + crop_low = self._replace_negative_one_in_crop_size(self.crop_low, observation.shape) + + # crop + indices = [slice(i, j) for i, j in zip(crop_low, crop_high)] + observation = observation[indices] + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + # replace -1 with the max size + crop_high = self._replace_negative_one_in_crop_size(self.crop_high, input_observation_space.shape) + crop_low = self._replace_negative_one_in_crop_size(self.crop_low, input_observation_space.shape) + + input_observation_space.shape = crop_high - crop_low + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html b/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html new file mode 100644 index 0000000..8a3c193 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_move_axis_filter.html @@ -0,0 +1,294 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_move_axis_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_move_axis_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_move_axis_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
+
+
+
[docs]class ObservationMoveAxisFilter(ObservationFilter): + """ + Reorders the axes of the observation. This can be useful when the observation is an + image, and we want to move the channel axis to be the last axis instead of the first axis. + """ + def __init__(self, axis_origin: int = None, axis_target: int=None): + """ + :param axis_origin: The axis to move + :param axis_target: Where to move the selected axis to + """ + super().__init__() + self.axis_origin = axis_origin + self.axis_target = axis_target + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + shape = input_observation_space.shape + if not -len(shape) <= self.axis_origin < len(shape) or not -len(shape) <= self.axis_target < len(shape): + raise ValueError("The given axis does not exist in the context of the input observation shape. ") + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + return np.moveaxis(observation, self.axis_origin, self.axis_target) + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + axis_size = input_observation_space.shape[self.axis_origin] + input_observation_space.shape = np.delete(input_observation_space.shape, self.axis_origin) + if self.axis_target == -1: + input_observation_space.shape = np.append(input_observation_space.shape, axis_size) + elif self.axis_target < -1: + input_observation_space.shape = np.insert(input_observation_space.shape, self.axis_target+1, axis_size) + else: + input_observation_space.shape = np.insert(input_observation_space.shape, self.axis_target, axis_size) + + # move the channels axis according to the axis change + if isinstance(input_observation_space, PlanarMapsObservationSpace): + if input_observation_space.channels_axis == self.axis_origin: + input_observation_space.channels_axis = self.axis_target + elif input_observation_space.channels_axis == self.axis_target: + input_observation_space.channels_axis = self.axis_origin + elif self.axis_origin < input_observation_space.channels_axis < self.axis_target: + input_observation_space.channels_axis -= 1 + elif self.axis_target < input_observation_space.channels_axis < self.axis_origin: + input_observation_space.channels_axis += 1 + + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html b/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html new file mode 100644 index 0000000..bc57068 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_normalization_filter.html @@ -0,0 +1,302 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_normalization_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_normalization_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_normalization_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import List
+
+import numpy as np
+
+from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationNormalizationFilter(ObservationFilter): + """ + Normalizes the observation values with a running mean and standard deviation of + all the observations seen so far. The normalization is performed element-wise. Additionally, when working with + multiple workers, the statistics used for the normalization operation are accumulated over all the workers. + """ + def __init__(self, clip_min: float=-5.0, clip_max: float=5.0, name='observation_stats'): + """ + :param clip_min: The minimum value to allow after normalizing the observation + :param clip_max: The maximum value to allow after normalizing the observation + """ + super().__init__() + self.clip_min = clip_min + self.clip_max = clip_max + self.running_observation_stats = None + self.name = name + self.supports_batching = True + self.observation_space = None + + def set_device(self, device, memory_backend_params=None) -> None: + """ + An optional function that allows the filter to get the device if it is required to use tensorflow ops + :param device: the device to use + :return: None + """ + self.running_observation_stats = SharedRunningStats(device, name=self.name, create_ops=False, + pubsub_params=memory_backend_params) + + def set_session(self, sess) -> None: + """ + An optional function that allows the filter to get the session if it is required to use tensorflow ops + :param sess: the session + :return: None + """ + self.running_observation_stats.set_session(sess) + + def filter(self, observations: List[ObservationType], update_internal_state: bool=True) -> ObservationType: + observations = np.array(observations) + if update_internal_state: + self.running_observation_stats.push(observations) + self.last_mean = self.running_observation_stats.mean + self.last_stdev = self.running_observation_stats.std + + # TODO: make sure that a batch is given here + return self.running_observation_stats.normalize(observations) + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + self.running_observation_stats.create_ops(shape=input_observation_space.shape, + clip_values=(self.clip_min, self.clip_max)) + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html b/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html new file mode 100644 index 0000000..f997686 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_reduction_by_sub_parts_name_filter.html @@ -0,0 +1,308 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import copy
+from enum import Enum
+from typing import List
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace, VectorObservationSpace
+
+
+
[docs]class ObservationReductionBySubPartsNameFilter(ObservationFilter): + """ + Allows keeping only parts of the observation, by specifying their + name. This is useful when the environment has a measurements vector as observation which includes several different + measurements, but you want the agent to only see some of the measurements and not all. + For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as + speed and location. If we want to only use the speed, it can be done using this filter. + This will currently work only for VectorObservationSpace observations + """ + class ReductionMethod(Enum): + Keep = 0 + Discard = 1 + + def __init__(self, part_names: List[str], reduction_method: ReductionMethod): + """ + :param part_names: A list of part names to reduce + :param reduction_method: A reduction method to use - keep or discard the given parts + """ + super().__init__() + self.part_names = part_names + self.reduction_method = reduction_method + self.measurement_names = None + self.indices_to_keep = None + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + if not isinstance(observation, np.ndarray): + raise ValueError("All the state values are expected to be numpy arrays") + if self.indices_to_keep is None: + raise ValueError("To use ObservationReductionBySubPartsNameFilter, the get_filtered_observation_space " + "function should be called before filtering an observation") + observation = observation[..., self.indices_to_keep] + return observation + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if not isinstance(input_observation_space, VectorObservationSpace): + raise ValueError("The ObservationReductionBySubPartsNameFilter support only VectorObservationSpace " + "observations. The given observation space was: {}" + .format(input_observation_space.__class__)) + + def get_filtered_observation_space(self, input_observation_space: VectorObservationSpace) -> ObservationSpace: + self.measurement_names = copy.copy(input_observation_space.measurements_names) + + if self.reduction_method == self.ReductionMethod.Keep: + input_observation_space.shape[-1] = len(self.part_names) + self.indices_to_keep = [idx for idx, val in enumerate(self.measurement_names) if val in self.part_names] + input_observation_space.measurements_names = copy.copy(self.part_names) + elif self.reduction_method == self.ReductionMethod.Discard: + input_observation_space.shape[-1] -= len(self.part_names) + self.indices_to_keep = [idx for idx, val in enumerate(self.measurement_names) if val not in self.part_names] + input_observation_space.measurements_names = [val for val in input_observation_space.measurements_names if + val not in self.part_names] + else: + raise ValueError("The given reduction method is not supported") + + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html new file mode 100644 index 0000000..3e7f311 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_rescale_size_by_factor_filter.html @@ -0,0 +1,300 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_rescale_size_by_factor_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_rescale_size_by_factor_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_rescale_size_by_factor_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from enum import Enum
+
+import scipy.ndimage
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+# imresize interpolation types as defined by scipy here:
+# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.misc.imresize.html
+class RescaleInterpolationType(Enum):
+    NEAREST = 'nearest'
+    LANCZOS = 'lanczos'
+    BILINEAR = 'bilinear'
+    BICUBIC = 'bicubic'
+    CUBIC = 'cubic'
+
+
+
[docs]class ObservationRescaleSizeByFactorFilter(ObservationFilter): + """ + Rescales an image observation by some factor. For example, the image size + can be reduced by a factor of 2. + Warning: this requires the input observation to be of type uint8 due to scipy requirements! + """ + def __init__(self, rescale_factor: float, rescaling_interpolation_type: RescaleInterpolationType): + """ + :param rescale_factor: the factor by which the observation will be rescaled + :param rescaling_interpolation_type: the interpolation type for rescaling + """ + super().__init__() + self.rescale_factor = float(rescale_factor) # scipy requires float scale factors + self.rescaling_interpolation_type = rescaling_interpolation_type + # TODO: allow selecting the channels dim + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if not 2 <= input_observation_space.num_dimensions <= 3: + raise ValueError("The rescale filter only applies to image observations where the number of dimensions is" + "either 2 (grayscale) or 3 (RGB). The number of dimensions defined for the " + "output observation was {}".format(input_observation_space.num_dimensions)) + if input_observation_space.num_dimensions == 3 and input_observation_space.shape[-1] != 3: + raise ValueError("Observations with 3 dimensions must have 3 channels in the last axis (RGB)") + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + # scipy works only with uint8 + observation = observation.astype('uint8') + + # rescale + observation = scipy.misc.imresize(observation, + self.rescale_factor, + interp=self.rescaling_interpolation_type.value) + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + input_observation_space.shape[:2] = (input_observation_space.shape[:2] * self.rescale_factor).astype('int') + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html new file mode 100644 index 0000000..922f0e5 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_rescale_to_size_filter.html @@ -0,0 +1,326 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_rescale_to_size_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_rescale_to_size_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_rescale_to_size_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from enum import Enum
+
+import numpy as np
+import scipy.ndimage
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace, ImageObservationSpace
+
+
+# imresize interpolation types as defined by scipy here:
+# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.misc.imresize.html
+class RescaleInterpolationType(Enum):
+    NEAREST = 'nearest'
+    LANCZOS = 'lanczos'
+    BILINEAR = 'bilinear'
+    BICUBIC = 'bicubic'
+    CUBIC = 'cubic'
+
+
+
[docs]class ObservationRescaleToSizeFilter(ObservationFilter): + """ + Rescales an image observation to a given size. The target size does not + necessarily keep the aspect ratio of the original observation. + Warning: this requires the input observation to be of type uint8 due to scipy requirements! + """ + def __init__(self, output_observation_space: PlanarMapsObservationSpace, + rescaling_interpolation_type: RescaleInterpolationType=RescaleInterpolationType.BILINEAR): + """ + :param output_observation_space: the output observation space + :param rescaling_interpolation_type: the interpolation type for rescaling + """ + super().__init__() + self.output_observation_space = output_observation_space + self.rescaling_interpolation_type = rescaling_interpolation_type + + if not isinstance(output_observation_space, PlanarMapsObservationSpace): + raise ValueError("The rescale filter only applies to observation spaces that inherit from " + "PlanarMapsObservationSpace. This includes observations which consist of a set of 2D " + "images or an RGB image. Instead the output observation space was defined as: {}" + .format(output_observation_space.__class__)) + + self.planar_map_output_shape = copy.copy(self.output_observation_space.shape) + self.planar_map_output_shape = np.delete(self.planar_map_output_shape, + self.output_observation_space.channels_axis) + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if not isinstance(input_observation_space, PlanarMapsObservationSpace): + raise ValueError("The rescale filter only applies to observation spaces that inherit from " + "PlanarMapsObservationSpace. This includes observations which consist of a set of 2D " + "images or an RGB image. Instead the input observation space was defined as: {}" + .format(input_observation_space.__class__)) + if input_observation_space.shape[input_observation_space.channels_axis] \ + != self.output_observation_space.shape[self.output_observation_space.channels_axis]: + raise ValueError("The number of channels between the input and output observation spaces must match. " + "Instead the number of channels were: {}, {}" + .format(input_observation_space.shape[input_observation_space.channels_axis], + self.output_observation_space.shape[self.output_observation_space.channels_axis])) + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + # scipy works only with uint8 + observation = observation.astype('uint8') + + # rescale + if isinstance(self.output_observation_space, ImageObservationSpace): + observation = scipy.misc.imresize(observation, + tuple(self.output_observation_space.shape), + interp=self.rescaling_interpolation_type.value) + else: + new_observation = [] + for i in range(self.output_observation_space.shape[self.output_observation_space.channels_axis]): + new_observation.append(scipy.misc.imresize(observation.take(i, self.output_observation_space.channels_axis), + tuple(self.planar_map_output_shape), + interp=self.rescaling_interpolation_type.value)) + new_observation = np.array(new_observation) + observation = new_observation.swapaxes(0, self.output_observation_space.channels_axis) + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + input_observation_space.shape = self.output_observation_space.shape + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html b/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html new file mode 100644 index 0000000..cf4081b --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_rgb_to_y_filter.html @@ -0,0 +1,278 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_rgb_to_y_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_rgb_to_y_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_rgb_to_y_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationRGBToYFilter(ObservationFilter): + """ + Converts a color image observation specified using the RGB encoding into a grayscale + image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors + in the original image are not relevant for solving the task at hand. + The channels axis is assumed to be the last axis + """ + def __init__(self): + super().__init__() + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if input_observation_space.num_dimensions != 3: + raise ValueError("The rescale filter only applies to image observations where the number of dimensions is" + "3 (RGB). The number of dimensions defined for the input observation was {}" + .format(input_observation_space.num_dimensions)) + if input_observation_space.shape[-1] != 3: + raise ValueError("The observation space is expected to have 3 channels in the 1st dimension. The number of " + "dimensions received is {}".format(input_observation_space.shape[-1])) + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + + # rgb to y + r, g, b = observation[:, :, 0], observation[:, :, 1], observation[:, :, 2] + observation = 0.2989 * r + 0.5870 * g + 0.1140 * b + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + input_observation_space.shape = input_observation_space.shape[:-1] + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html b/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html new file mode 100644 index 0000000..d9c0445 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_squeeze_filter.html @@ -0,0 +1,276 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_squeeze_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_squeeze_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_squeeze_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationSqueezeFilter(ObservationFilter): + """ + Removes redundant axes from the observation, which are axes with a dimension of 1. + """ + def __init__(self, axis: int = None): + """ + :param axis: Specifies which axis to remove. If set to None, all the axes of size 1 will be removed. + """ + super().__init__() + self.axis = axis + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if self.axis is None: + return + + shape = input_observation_space.shape + if self.axis >= len(shape) or self.axis < -len(shape): + raise ValueError("The given axis does not exist in the context of the input observation shape. ") + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + return observation.squeeze(axis=self.axis) + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + dummy_tensor = np.random.rand(*tuple(input_observation_space.shape)) + input_observation_space.shape = dummy_tensor.squeeze(axis=self.axis).shape + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html b/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html new file mode 100644 index 0000000..3f6764e --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_stacking_filter.html @@ -0,0 +1,335 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_stacking_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_stacking_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_stacking_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from collections import deque
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+class LazyStack(object):
+    """
+    A lazy version of np.stack which avoids copying the memory until it is
+    needed.
+    """
+
+    def __init__(self, history, axis=None):
+        self.history = copy.copy(history)
+        self.axis = axis
+
+    def __array__(self, dtype=None):
+        array = np.stack(self.history, axis=self.axis)
+        if dtype is not None:
+            array = array.astype(dtype)
+        return array
+
+
+
[docs]class ObservationStackingFilter(ObservationFilter): + """ + Stacks several observations on top of each other. For image observation this will + create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this, + a LazyStack object is used in order to wrap the observations in the stack. For this reason, the + ObservationStackingFilter **must** be the last filter in the inputs filters stack. + This filter is stateful since it stores the previous step result and depends on it. + The filter adds an additional dimension to the output observation. + + Warning!!! The filter replaces the observation with a LazyStack object, so no filters should be + applied after this filter. applying more filters will cause the LazyStack object to be converted to a numpy array + and increase the memory footprint. + """ + def __init__(self, stack_size: int, stacking_axis: int=-1): + """ + :param stack_size: the number of previous observations in the stack + :param stacking_axis: the axis on which to stack the observation on + """ + super().__init__() + self.stack_size = stack_size + self.stacking_axis = stacking_axis + self.stack = [] + + if stack_size <= 0: + raise ValueError("The stack shape must be a positive number") + if type(stack_size) != int: + raise ValueError("The stack shape must be of int type") + + @property + def next_filter(self) -> 'InputFilter': + return self._next_filter + + @next_filter.setter + def next_filter(self, val: 'InputFilter'): + raise ValueError("ObservationStackingFilter can have no other filters after it since they break its " + "functionality") + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if len(self.stack) > 0 and not input_observation_space.val_matches_space_definition(self.stack[-1]): + raise ValueError("The given input observation space is different than the observations already stored in" + "the filters memory") + if input_observation_space.num_dimensions <= self.stacking_axis: + raise ValueError("The stacking axis is larger than the number of dimensions in the observation space") + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + + if len(self.stack) == 0: + self.stack = deque([observation] * self.stack_size, maxlen=self.stack_size) + else: + if update_internal_state: + self.stack.append(observation) + observation = LazyStack(self.stack, self.stacking_axis) + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + if self.stacking_axis == -1: + input_observation_space.shape = np.append(input_observation_space.shape, values=[self.stack_size], axis=0) + else: + input_observation_space.shape = np.insert(input_observation_space.shape, obj=self.stacking_axis, + values=[self.stack_size], axis=0) + return input_observation_space + + def reset(self) -> None: + self.stack = []
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html b/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html new file mode 100644 index 0000000..30f9317 --- /dev/null +++ b/docs/_modules/rl_coach/filters/observation/observation_to_uint8_filter.html @@ -0,0 +1,292 @@ + + + + + + + + + + + rl_coach.filters.observation.observation_to_uint8_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.observation.observation_to_uint8_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.observation.observation_to_uint8_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from rl_coach.core_types import ObservationType
+from rl_coach.filters.observation.observation_filter import ObservationFilter
+from rl_coach.spaces import ObservationSpace
+
+
+
[docs]class ObservationToUInt8Filter(ObservationFilter): + """ + Converts a floating point observation into an unsigned int 8 bit observation. This is + mostly useful for reducing memory consumption and is usually used for image observations. The filter will first + spread the observation values over the range 0-255 and then discretize them into integer values. + """ + def __init__(self, input_low: float, input_high: float): + """ + :param input_low: The lowest value currently present in the observation + :param input_high: The highest value currently present in the observation + """ + super().__init__() + self.input_low = input_low + self.input_high = input_high + + if input_high <= input_low: + raise ValueError("The input observation space high values can be less or equal to the input observation " + "space low values") + + def validate_input_observation_space(self, input_observation_space: ObservationSpace): + if np.all(input_observation_space.low != self.input_low) or \ + np.all(input_observation_space.high != self.input_high): + raise ValueError("The observation space values range don't match the configuration of the filter." + "The configuration is: low = {}, high = {}. The actual values are: low = {}, high = {}" + .format(self.input_low, self.input_high, + input_observation_space.low, input_observation_space.high)) + + def filter(self, observation: ObservationType, update_internal_state: bool=True) -> ObservationType: + # scale to 0-1 + observation = (observation - self.input_low) / (self.input_high - self.input_low) + + # scale to 0-255 + observation *= 255 + + observation = observation.astype('uint8') + + return observation + + def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace: + input_observation_space.low = 0 + input_observation_space.high = 255 + return input_observation_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html b/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html new file mode 100644 index 0000000..9bc744f --- /dev/null +++ b/docs/_modules/rl_coach/filters/reward/reward_clipping_filter.html @@ -0,0 +1,281 @@ + + + + + + + + + + + rl_coach.filters.reward.reward_clipping_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.reward.reward_clipping_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.reward.reward_clipping_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from rl_coach.core_types import RewardType
+from rl_coach.filters.reward.reward_filter import RewardFilter
+from rl_coach.spaces import RewardSpace
+
+
+
[docs]class RewardClippingFilter(RewardFilter): + """ + Clips the reward values into a given range. For example, in DQN, the Atari rewards are + clipped into the range -1 and 1 in order to control the scale of the returns. + """ + def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf): + """ + :param clipping_low: The low threshold for reward clipping + :param clipping_high: The high threshold for reward clipping + """ + super().__init__() + self.clipping_low = clipping_low + self.clipping_high = clipping_high + + if clipping_low > clipping_high: + raise ValueError("The reward clipping low must be lower than the reward clipping max") + + def filter(self, reward: RewardType, update_internal_state: bool=True) -> RewardType: + reward = float(reward) + + if self.clipping_high: + reward = min(reward, self.clipping_high) + if self.clipping_low: + reward = max(reward, self.clipping_low) + + return reward + + def get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace: + input_reward_space.high = min(self.clipping_high, input_reward_space.high) + input_reward_space.low = max(self.clipping_low, input_reward_space.low) + return input_reward_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html b/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html new file mode 100644 index 0000000..a391dca --- /dev/null +++ b/docs/_modules/rl_coach/filters/reward/reward_normalization_filter.html @@ -0,0 +1,297 @@ + + + + + + + + + + + rl_coach.filters.reward.reward_normalization_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.reward.reward_normalization_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.reward.reward_normalization_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import numpy as np
+
+from rl_coach.architectures.tensorflow_components.shared_variables import SharedRunningStats
+from rl_coach.core_types import RewardType
+from rl_coach.filters.reward.reward_filter import RewardFilter
+from rl_coach.spaces import RewardSpace
+
+
+
[docs]class RewardNormalizationFilter(RewardFilter): + """ + Normalizes the reward values with a running mean and standard deviation of + all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation + are accumulated over all the workers. + """ + def __init__(self, clip_min: float=-5.0, clip_max: float=5.0): + """ + :param clip_min: The minimum value to allow after normalizing the reward + :param clip_max: The maximum value to allow after normalizing the reward + """ + super().__init__() + self.clip_min = clip_min + self.clip_max = clip_max + self.running_rewards_stats = None + + def set_device(self, device, memory_backend_params=None) -> None: + """ + An optional function that allows the filter to get the device if it is required to use tensorflow ops + :param device: the device to use + :return: None + """ + self.running_rewards_stats = SharedRunningStats(device, name='rewards_stats', + pubsub_params=memory_backend_params) + + def set_session(self, sess) -> None: + """ + An optional function that allows the filter to get the session if it is required to use tensorflow ops + :param sess: the session + :return: None + """ + self.running_rewards_stats.set_session(sess) + + def filter(self, reward: RewardType, update_internal_state: bool=True) -> RewardType: + if update_internal_state: + self.running_rewards_stats.push(reward) + + reward = (reward - self.running_rewards_stats.mean) / \ + (self.running_rewards_stats.std + 1e-15) + reward = np.clip(reward, self.clip_min, self.clip_max) + + return reward + + def get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace: + return input_reward_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html b/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html new file mode 100644 index 0000000..f24abb6 --- /dev/null +++ b/docs/_modules/rl_coach/filters/reward/reward_rescale_filter.html @@ -0,0 +1,271 @@ + + + + + + + + + + + rl_coach.filters.reward.reward_rescale_filter — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.filters.reward.reward_rescale_filter
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.filters.reward.reward_rescale_filter

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from rl_coach.core_types import RewardType
+from rl_coach.filters.reward.reward_filter import RewardFilter
+from rl_coach.spaces import RewardSpace
+
+
+
[docs]class RewardRescaleFilter(RewardFilter): + """ + Rescales the reward by a given factor. Rescaling the rewards of the environment has been + observed to have a large effect (negative or positive) on the behavior of the learning process. + """ + def __init__(self, rescale_factor: float): + """ + :param rescale_factor: The reward rescaling factor by which the reward will be multiplied + """ + super().__init__() + self.rescale_factor = rescale_factor + + if rescale_factor == 0: + raise ValueError("The reward rescale value can not be set to 0") + + def filter(self, reward: RewardType, update_internal_state: bool=True) -> RewardType: + reward = float(reward) * self.rescale_factor + return reward + + def get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace: + input_reward_space.high = input_reward_space.high * self.rescale_factor + input_reward_space.low = input_reward_space.low * self.rescale_factor + return input_reward_space
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html new file mode 100644 index 0000000..0461fc3 --- /dev/null +++ b/docs/_modules/rl_coach/memories/episodic/episodic_experience_replay.html @@ -0,0 +1,535 @@ + + + + + + + + + + + rl_coach.memories.episodic.episodic_experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.episodic.episodic_experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.episodic.episodic_experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Tuple, Union, Dict, Any
+
+import numpy as np
+
+from rl_coach.core_types import Transition, Episode
+from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
+from rl_coach.utils import ReaderWriterLock
+
+
+class EpisodicExperienceReplayParameters(MemoryParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 1000000)
+        self.n_step = -1
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.episodic.episodic_experience_replay:EpisodicExperienceReplay'
+
+
+
[docs]class EpisodicExperienceReplay(Memory): + """ + A replay buffer that stores episodes of transitions. The additional structure allows performing various + calculations of total return and other values that depend on the sequential behavior of the transitions + in the episode. + """ + def __init__(self, max_size: Tuple[MemoryGranularity, int]=(MemoryGranularity.Transitions, 1000000), n_step=-1): + """ + :param max_size: the maximum number of transitions or episodes to hold in the memory + """ + super().__init__(max_size) + self.n_step = n_step + self._buffer = [Episode(n_step=self.n_step)] # list of episodes + self.transitions = [] + self._length = 1 # the episodic replay buffer starts with a single empty episode + self._num_transitions = 0 + self._num_transitions_in_complete_episodes = 0 + self.reader_writer_lock = ReaderWriterLock() + + def length(self, lock: bool=False) -> int: + """ + Get the number of episodes in the ER (even if they are not complete) + """ + length = self._length + if self._length is not 0 and self._buffer[-1].is_empty(): + length = self._length - 1 + + return length + + def num_complete_episodes(self): + """ Get the number of complete episodes in ER """ + length = self._length - 1 + + return length + + def num_transitions(self): + return self._num_transitions + + def num_transitions_in_complete_episodes(self): + return self._num_transitions_in_complete_episodes + + def sample(self, size: int) -> List[Transition]: + """ + Sample a batch of transitions form the replay buffer. If the requested size is larger than the number + of samples available in the replay buffer then the batch will return empty. + :param size: the size of the batch to sample + :return: a batch (list) of selected transitions from the replay buffer + """ + self.reader_writer_lock.lock_writing() + + if self.num_complete_episodes() >= 1: + transitions_idx = np.random.randint(self.num_transitions_in_complete_episodes(), size=size) + batch = [self.transitions[i] for i in transitions_idx] + + else: + raise ValueError("The episodic replay buffer cannot be sampled since there are no complete episodes yet. " + "There is currently 1 episodes with {} transitions".format(self._buffer[0].length())) + + self.reader_writer_lock.release_writing() + + return batch + + def _enforce_max_length(self) -> None: + """ + Make sure that the size of the replay buffer does not pass the maximum size allowed. + If it passes the max size, the oldest episode in the replay buffer will be removed. + :return: None + """ + granularity, size = self.max_size + if granularity == MemoryGranularity.Transitions: + while size != 0 and self.num_transitions() > size: + self._remove_episode(0) + elif granularity == MemoryGranularity.Episodes: + while self.length() > size: + self._remove_episode(0) + + def _update_episode(self, episode: Episode) -> None: + episode.update_transitions_rewards_and_bootstrap_data() + + def verify_last_episode_is_closed(self) -> None: + """ + Verify that there is no open episodes in the replay buffer + :return: None + """ + self.reader_writer_lock.lock_writing_and_reading() + + last_episode = self.get(-1, False) + if last_episode and last_episode.length() > 0: + self.close_last_episode(lock=False) + + self.reader_writer_lock.release_writing_and_reading() + + def close_last_episode(self, lock=True) -> None: + """ + Close the last episode in the replay buffer and open a new one + :return: None + """ + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + last_episode = self._buffer[-1] + + self._num_transitions_in_complete_episodes += last_episode.length() + self._length += 1 + + # create a new Episode for the next transitions to be placed into + self._buffer.append(Episode(n_step=self.n_step)) + + # if update episode adds to the buffer, a new Episode needs to be ready first + # it would be better if this were less state full + self._update_episode(last_episode) + + self._enforce_max_length() + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def store(self, transition: Transition) -> None: + """ + Store a new transition in the memory. If the transition game_over flag is on, this closes the episode and + creates a new empty episode. + Warning! using the episodic memory by storing individual transitions instead of episodes will use the default + Episode class parameters in order to create new episodes. + :param transition: a transition to store + :return: None + """ + + # Calling super.store() so that in case a memory backend is used, the memory backend can store this transition. + super().store(transition) + + self.reader_writer_lock.lock_writing_and_reading() + + if len(self._buffer) == 0: + self._buffer.append(Episode(n_step=self.n_step)) + last_episode = self._buffer[-1] + last_episode.insert(transition) + self.transitions.append(transition) + self._num_transitions += 1 + if transition.game_over: + self.close_last_episode(False) + + self._enforce_max_length() + + self.reader_writer_lock.release_writing_and_reading() + + def store_episode(self, episode: Episode, lock: bool=True) -> None: + """ + Store a new episode in the memory. + :param episode: the new episode to store + :return: None + """ + # Calling super.store() so that in case a memory backend is used, the memory backend can store this episode. + super().store_episode(episode) + + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + if self._buffer[-1].length() == 0: + self._buffer[-1] = episode + else: + self._buffer.append(episode) + self.transitions.extend(episode.transitions) + self._num_transitions += episode.length() + self.close_last_episode(False) + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def get_episode(self, episode_index: int, lock: bool=True) -> Union[None, Episode]: + """ + Returns the episode in the given index. If the episode does not exist, returns None instead. + :param episode_index: the index of the episode to return + :return: the corresponding episode + """ + if lock: + self.reader_writer_lock.lock_writing() + + if self.length() == 0 or episode_index >= self.length(): + episode = None + else: + episode = self._buffer[episode_index] + + if lock: + self.reader_writer_lock.release_writing() + return episode + + def _remove_episode(self, episode_index: int) -> None: + """ + Remove the episode in the given index (even if it is not complete yet) + :param episode_index: the index of the episode to remove + :return: None + """ + if len(self._buffer) > episode_index: + episode_length = self._buffer[episode_index].length() + self._length -= 1 + self._num_transitions -= episode_length + self._num_transitions_in_complete_episodes -= episode_length + del self.transitions[:episode_length] + del self._buffer[episode_index] + + def remove_episode(self, episode_index: int) -> None: + """ + Remove the episode in the given index (even if it is not complete yet) + :param episode_index: the index of the episode to remove + :return: None + """ + self.reader_writer_lock.lock_writing_and_reading() + + self._remove_episode(episode_index) + + self.reader_writer_lock.release_writing_and_reading() + + # for API compatibility + def get(self, episode_index: int, lock: bool=True) -> Union[None, Episode]: + """ + Returns the episode in the given index. If the episode does not exist, returns None instead. + :param episode_index: the index of the episode to return + :return: the corresponding episode + """ + return self.get_episode(episode_index, lock) + + def get_last_complete_episode(self) -> Union[None, Episode]: + """ + Returns the last complete episode in the memory or None if there are no complete episodes + :return: None or the last complete episode + """ + self.reader_writer_lock.lock_writing() + + last_complete_episode_index = self.num_complete_episodes() - 1 + episode = None + if last_complete_episode_index >= 0: + episode = self.get(last_complete_episode_index) + + self.reader_writer_lock.release_writing() + + return episode + + # for API compatibility + def remove(self, episode_index: int): + """ + Remove the episode in the given index (even if it is not complete yet) + :param episode_index: the index of the episode to remove + :return: None + """ + self.remove_episode(episode_index) + + def clean(self) -> None: + """ + Clean the memory by removing all the episodes + :return: None + """ + self.reader_writer_lock.lock_writing_and_reading() + + self.transitions = [] + self._buffer = [Episode(n_step=self.n_step)] + self._length = 1 + self._num_transitions = 0 + self._num_transitions_in_complete_episodes = 0 + + self.reader_writer_lock.release_writing_and_reading() + + def mean_reward(self) -> np.ndarray: + """ + Get the mean reward in the replay buffer + :return: the mean reward + """ + self.reader_writer_lock.lock_writing() + + mean = np.mean([transition.reward for transition in self.transitions]) + + self.reader_writer_lock.release_writing() + return mean
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html new file mode 100644 index 0000000..84f03f5 --- /dev/null +++ b/docs/_modules/rl_coach/memories/episodic/episodic_hindsight_experience_replay.html @@ -0,0 +1,375 @@ + + + + + + + + + + + rl_coach.memories.episodic.episodic_hindsight_experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.episodic.episodic_hindsight_experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.episodic.episodic_hindsight_experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+from enum import Enum
+from typing import Tuple, List
+
+import numpy as np
+
+from rl_coach.core_types import Episode, Transition
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters, \
+    EpisodicExperienceReplay
+from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity
+from rl_coach.spaces import GoalsSpace
+
+
+class HindsightGoalSelectionMethod(Enum):
+    Future = 0
+    Final = 1
+    Episode = 2
+    Random = 3
+
+
+class EpisodicHindsightExperienceReplayParameters(EpisodicExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+        self.hindsight_transitions_per_regular_transition = None
+        self.hindsight_goal_selection_method = None
+        self.goals_space = None
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.episodic.episodic_hindsight_experience_replay:EpisodicHindsightExperienceReplay'
+
+
+
[docs]class EpisodicHindsightExperienceReplay(EpisodicExperienceReplay): + """ + Implements Hindsight Experience Replay as described in the following paper: https://arxiv.org/pdf/1707.01495.pdf + + """ + def __init__(self, max_size: Tuple[MemoryGranularity, int], + hindsight_transitions_per_regular_transition: int, + hindsight_goal_selection_method: HindsightGoalSelectionMethod, + goals_space: GoalsSpace): + """ + :param max_size: The maximum size of the memory. should be defined in a granularity of Transitions + :param hindsight_transitions_per_regular_transition: The number of hindsight artificial transitions to generate + for each actual transition + :param hindsight_goal_selection_method: The method that will be used for generating the goals for the + hindsight transitions. Should be one of HindsightGoalSelectionMethod + :param goals_space: A GoalsSpace which defines the base properties of the goals space + """ + super().__init__(max_size) + + self.hindsight_transitions_per_regular_transition = hindsight_transitions_per_regular_transition + self.hindsight_goal_selection_method = hindsight_goal_selection_method + self.goals_space = goals_space + self.last_episode_start_idx = 0 + + def _sample_goal(self, episode_transitions: List, transition_index: int): + """ + Sample a single goal state according to the sampling method + :param episode_transitions: a list of all the transitions in the current episode + :param transition_index: the transition to start sampling from + :return: a goal corresponding to the sampled state + """ + if self.hindsight_goal_selection_method == HindsightGoalSelectionMethod.Future: + # states that were observed in the same episode after the transition that is being replayed + selected_transition = np.random.choice(episode_transitions[transition_index+1:]) + elif self.hindsight_goal_selection_method == HindsightGoalSelectionMethod.Final: + # the final state in the episode + selected_transition = episode_transitions[-1] + elif self.hindsight_goal_selection_method == HindsightGoalSelectionMethod.Episode: + # a random state from the episode + selected_transition = np.random.choice(episode_transitions) + elif self.hindsight_goal_selection_method == HindsightGoalSelectionMethod.Random: + # a random state from the entire replay buffer + selected_transition = np.random.choice(self.transitions) + else: + raise ValueError("Invalid goal selection method was used for the hindsight goal selection") + return self.goals_space.goal_from_state(selected_transition.state) + + def _sample_goals(self, episode_transitions: List, transition_index: int): + """ + Sample a batch of goal states according to the sampling method + :param episode_transitions: a list of all the transitions in the current episode + :param transition_index: the transition to start sampling from + :return: a goal corresponding to the sampled state + """ + return [ + self._sample_goal(episode_transitions, transition_index) + for _ in range(self.hindsight_transitions_per_regular_transition) + ] + + def store_episode(self, episode: Episode, lock: bool=True) -> None: + # generate hindsight transitions only when an episode is finished + last_episode_transitions = copy.copy(episode.transitions) + + # cannot create a future hindsight goal in the last transition of an episode + if self.hindsight_goal_selection_method == HindsightGoalSelectionMethod.Future: + relevant_base_transitions = last_episode_transitions[:-1] + else: + relevant_base_transitions = last_episode_transitions + + # for each transition in the last episode, create a set of hindsight transitions + for transition_index, transition in enumerate(relevant_base_transitions): + sampled_goals = self._sample_goals(last_episode_transitions, transition_index) + for goal in sampled_goals: + hindsight_transition = copy.copy(transition) + + if hindsight_transition.state['desired_goal'].shape != goal.shape: + raise ValueError(( + 'goal shape {goal_shape} already in transition is ' + 'different than the one sampled as a hindsight goal ' + '{hindsight_goal_shape}.' + ).format( + goal_shape=hindsight_transition.state['desired_goal'].shape, + hindsight_goal_shape=goal.shape, + )) + + # update the goal in the transition + hindsight_transition.state['desired_goal'] = goal + hindsight_transition.next_state['desired_goal'] = goal + + # update the reward and terminal signal according to the goal + hindsight_transition.reward, hindsight_transition.game_over = \ + self.goals_space.get_reward_for_goal_and_state(goal, hindsight_transition.next_state) + + hindsight_transition.n_step_discounted_rewards = None + episode.insert(hindsight_transition) + + super().store_episode(episode) + + def store(self, transition: Transition): + raise ValueError("An episodic HER cannot store a single transition. Only full episodes are to be stored.")
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html b/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html new file mode 100644 index 0000000..b0b1c77 --- /dev/null +++ b/docs/_modules/rl_coach/memories/episodic/episodic_hrl_hindsight_experience_replay.html @@ -0,0 +1,300 @@ + + + + + + + + + + + rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Tuple
+
+from rl_coach.core_types import Episode, Transition
+from rl_coach.memories.episodic.episodic_hindsight_experience_replay import HindsightGoalSelectionMethod, \
+    EpisodicHindsightExperienceReplay, EpisodicHindsightExperienceReplayParameters
+from rl_coach.memories.non_episodic.experience_replay import MemoryGranularity
+from rl_coach.spaces import GoalsSpace
+
+
+class EpisodicHRLHindsightExperienceReplayParameters(EpisodicHindsightExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay:EpisodicHRLHindsightExperienceReplay'
+
+
+
[docs]class EpisodicHRLHindsightExperienceReplay(EpisodicHindsightExperienceReplay): + """ + Implements HRL Hindsight Experience Replay as described in the following paper: https://arxiv.org/abs/1805.08180 + + This is the memory you should use if you want a shared hindsight experience replay buffer between multiple workers + """ + def __init__(self, max_size: Tuple[MemoryGranularity, int], + hindsight_transitions_per_regular_transition: int, + hindsight_goal_selection_method: HindsightGoalSelectionMethod, + goals_space: GoalsSpace, + ): + """ + :param max_size: The maximum size of the memory. should be defined in a granularity of Transitions + :param hindsight_transitions_per_regular_transition: The number of hindsight artificial transitions to generate + for each actual transition + :param hindsight_goal_selection_method: The method that will be used for generating the goals for the + hindsight transitions. Should be one of HindsightGoalSelectionMethod + :param goals_space: A GoalsSpace which defines the properties of the goals + :param do_action_hindsight: Replace the action (sub-goal) given to a lower layer, with the actual achieved goal + """ + super().__init__(max_size, hindsight_transitions_per_regular_transition, hindsight_goal_selection_method, + goals_space) + + def store_episode(self, episode: Episode, lock: bool=True) -> None: + # for a layer producing sub-goals, we will replace in hindsight the action (sub-goal) given to the lower + # level with the actual achieved goal. the achieved goal (and observation) seen is assumed to be the same + # for all levels - we can use this level's achieved goal instead of the lower level's one + + # Calling super.store() so that in case a memory backend is used, the memory backend can store this episode. + super().store_episode(episode) + + for transition in episode.transitions: + new_achieved_goal = transition.next_state[self.goals_space.goal_name] + transition.action = new_achieved_goal + + super().store_episode(episode) + + def store(self, transition: Transition): + raise ValueError("An episodic HER cannot store a single transition. Only full episodes are to be stored.")
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html b/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html new file mode 100644 index 0000000..844f643 --- /dev/null +++ b/docs/_modules/rl_coach/memories/episodic/single_episode_buffer.html @@ -0,0 +1,260 @@ + + + + + + + + + + + rl_coach.memories.episodic.single_episode_buffer — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.episodic.single_episode_buffer
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.episodic.single_episode_buffer

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
+from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
+
+
+class SingleEpisodeBufferParameters(MemoryParameters):
+    def __init__(self):
+        super().__init__()
+        del self.max_size
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.episodic.single_episode_buffer:SingleEpisodeBuffer'
+
+
+
[docs]class SingleEpisodeBuffer(EpisodicExperienceReplay): + def __init__(self): + super().__init__((MemoryGranularity.Episodes, 1))
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html new file mode 100644 index 0000000..c1fdddc --- /dev/null +++ b/docs/_modules/rl_coach/memories/non_episodic/balanced_experience_replay.html @@ -0,0 +1,400 @@ + + + + + + + + + + + rl_coach.memories.non_episodic.balanced_experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.non_episodic.balanced_experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.non_episodic.balanced_experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import operator
+import random
+from enum import Enum
+from typing import List, Tuple, Any, Union
+
+import numpy as np
+
+from rl_coach.core_types import Transition
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay
+from rl_coach.schedules import Schedule, ConstantSchedule
+
+
+class BalancedExperienceReplayParameters(ExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 1000000)
+        self.allow_duplicates_in_batch_sampling = False
+        self.num_classes = 0
+        self.state_key_with_the_class_index = 'class'
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.non_episodic.balanced_experience_replay:BalancedExperienceReplay'
+
+
+"""
+A replay buffer which allows sampling batches which are balanced in terms of the classes that are sampled
+"""
+
[docs]class BalancedExperienceReplay(ExperienceReplay): + def __init__(self, max_size: Tuple[MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool=True, + num_classes: int=0, state_key_with_the_class_index: Any='class'): + """ + :param max_size: the maximum number of transitions or episodes to hold in the memory + :param allow_duplicates_in_batch_sampling: allow having the same transition multiple times in a batch + :param num_classes: the number of classes in the replayed data + :param state_key_with_the_class_index: the class index is assumed to be a value in the state dictionary. + this parameter determines the key to retrieve the class index value + """ + super().__init__(max_size, allow_duplicates_in_batch_sampling) + self.current_class_to_sample_from = 0 + self.num_classes = num_classes + self.state_key_with_the_class_index = state_key_with_the_class_index + self.transitions = [[] for _ in range(self.num_classes)] + self.transitions_order = [] + + if self.num_classes < 2: + raise ValueError("The number of classes for a balanced replay buffer should be at least 2. " + "The number of classes that were defined are: {}".format(self.num_classes)) + + def store(self, transition: Transition, lock: bool=True) -> None: + """ + Store a new transition in the memory. + :param transition: a transition to store + :param lock: if true, will lock the readers writers lock. this can cause a deadlock if an inheriting class + locks and then calls store with lock = True + :return: None + """ + # Calling super.store() so that in case a memory backend is used, the memory backend can store this transition. + super().store(transition) + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + self._num_transitions += 1 + + if self.state_key_with_the_class_index not in transition.state.keys(): + raise ValueError("The class index was not present in the state of the transition under the given key ({})" + .format(self.state_key_with_the_class_index)) + + class_idx = transition.state[self.state_key_with_the_class_index] + + if class_idx >= self.num_classes: + raise ValueError("The given class index is outside the defined number of classes for the replay buffer. " + "The given class was: {} and the number of classes defined is: {}" + .format(class_idx, self.num_classes)) + + self.transitions[class_idx].append(transition) + self.transitions_order.append(class_idx) + self._enforce_max_length() + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def sample(self, size: int) -> List[Transition]: + """ + Sample a batch of transitions form the replay buffer. If the requested size is larger than the number + of samples available in the replay buffer then the batch will return empty. + :param size: the size of the batch to sample + :return: a batch (list) of selected transitions from the replay buffer + """ + self.reader_writer_lock.lock_writing() + + if size % self.num_classes != 0: + raise ValueError("Sampling batches from a balanced replay buffer should be done only using batch sizes " + "which are a multiple of the number of classes. The number of classes defined is: {} " + "and the batch size requested is: {}".format(self.num_classes, size)) + + batch_size_from_each_class = size // self.num_classes + + if self.allow_duplicates_in_batch_sampling: + transitions_idx = [np.random.randint(len(class_transitions), size=batch_size_from_each_class) + for class_transitions in self.transitions] + + else: + for class_idx, class_transitions in enumerate(self.transitions): + if self.num_transitions() < batch_size_from_each_class: + raise ValueError("The replay buffer cannot be sampled since there are not enough transitions yet. " + "There are currently {} transitions for class {}" + .format(len(class_transitions), class_idx)) + + transitions_idx = [np.random.choice(len(class_transitions), size=batch_size_from_each_class, replace=False) + for class_transitions in self.transitions] + + batch = [] + for class_idx, class_transitions_idx in enumerate(transitions_idx): + batch += [self.transitions[class_idx][i] for i in class_transitions_idx] + + self.reader_writer_lock.release_writing() + + return batch + + def remove_transition(self, transition_index: int, lock: bool=True) -> None: + raise ValueError("It is not possible to remove specific transitions with a balanced replay buffer") + + def get_transition(self, transition_index: int, lock: bool=True) -> Union[None, Transition]: + raise ValueError("It is not possible to access specific transitions with a balanced replay buffer") + + def _enforce_max_length(self) -> None: + """ + Make sure that the size of the replay buffer does not pass the maximum size allowed. + If it passes the max size, the oldest transition in the replay buffer will be removed. + This function does not use locks since it is only called internally + :return: None + """ + granularity, size = self.max_size + if granularity == MemoryGranularity.Transitions: + while size != 0 and self.num_transitions() > size: + self._num_transitions -= 1 + del self.transitions[self.transitions_order[0]][0] + del self.transitions_order[0] + else: + raise ValueError("The granularity of the replay buffer can only be set in terms of transitions") + + def clean(self, lock: bool=True) -> None: + """ + Clean the memory by removing all the episodes + :return: None + """ + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + self.transitions = [[] for _ in range(self.num_classes)] + self.transitions_order = [] + self._num_transitions = 0 + + if lock: + self.reader_writer_lock.release_writing_and_reading()
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html b/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html new file mode 100644 index 0000000..6b5d69e --- /dev/null +++ b/docs/_modules/rl_coach/memories/non_episodic/differentiable_neural_dictionary.html @@ -0,0 +1,518 @@ + + + + + + + + + + + rl_coach.memories.non_episodic.differentiable_neural_dictionary — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.non_episodic.differentiable_neural_dictionary
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.non_episodic.differentiable_neural_dictionary

+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import pickle
+
+import numpy as np
+try:
+    import annoy
+    from annoy import AnnoyIndex
+except ImportError:
+    from rl_coach.logger import failed_imports
+    failed_imports.append("annoy")
+
+
+class AnnoyDictionary(object):
+    def __init__(self, dict_size, key_width, new_value_shift_coefficient=0.1, batch_size=100, key_error_threshold=0.01,
+                 num_neighbors=50, override_existing_keys=True, rebuild_on_every_update=False):
+        self.rebuild_on_every_update = rebuild_on_every_update
+        self.max_size = dict_size
+        self.curr_size = 0
+        self.new_value_shift_coefficient = new_value_shift_coefficient
+        self.num_neighbors = num_neighbors
+        self.override_existing_keys = override_existing_keys
+
+        self.index = AnnoyIndex(key_width, metric='euclidean')
+        self.index.set_seed(1)
+
+        self.embeddings = np.zeros((dict_size, key_width))
+        self.values = np.zeros(dict_size)
+        self.additional_data = [None] * dict_size
+
+        self.lru_timestamps = np.zeros(dict_size)
+        self.current_timestamp = 0.0
+
+        # keys that are in this distance will be considered as the same key
+        self.key_error_threshold = key_error_threshold
+
+        self.initial_update_size = batch_size
+        self.min_update_size = self.initial_update_size
+        self.key_dimension = key_width
+        self.value_dimension = 1
+        self._reset_buffer()
+
+        self.built_capacity = 0
+
+    def add(self, keys, values, additional_data=None):
+        if not additional_data:
+            additional_data = [None] * len(keys)
+
+        # Adds new embeddings and values to the dictionary
+        indices = []
+        indices_to_remove = []
+        for i in range(keys.shape[0]):
+            index = self._lookup_key_index(keys[i])
+            if index and self.override_existing_keys:
+                # update existing value
+                self.values[index] += self.new_value_shift_coefficient * (values[i] - self.values[index])
+                self.additional_data[index[0][0]] = additional_data[i]
+                self.lru_timestamps[index] = self.current_timestamp
+                indices_to_remove.append(i)
+            else:
+                # add new
+                if self.curr_size >= self.max_size:
+                    # find the LRU entry
+                    index = np.argmin(self.lru_timestamps)
+                else:
+                    index = self.curr_size
+                    self.curr_size += 1
+                self.lru_timestamps[index] = self.current_timestamp
+                indices.append(index)
+
+        for i in reversed(indices_to_remove):
+            keys = np.delete(keys, i, 0)
+            values = np.delete(values, i, 0)
+            del additional_data[i]
+
+        self.buffered_keys = np.vstack((self.buffered_keys, keys))
+        self.buffered_values = np.vstack((self.buffered_values, values))
+        self.buffered_indices = self.buffered_indices + indices
+        self.buffered_additional_data = self.buffered_additional_data + additional_data
+
+        if len(self.buffered_indices) >= self.min_update_size:
+            self.min_update_size = max(self.initial_update_size, int(self.curr_size * 0.02))
+            self._rebuild_index()
+        elif self.rebuild_on_every_update:
+            self._rebuild_index()
+
+        self.current_timestamp += 1
+
+    # Returns the stored embeddings and values of the closest embeddings
+    def query(self, keys, k):
+        if not self.has_enough_entries(k):
+            # this will only happen when the DND is not yet populated with enough entries, which is only during heatup
+            # these values won't be used and therefore they are meaningless
+            return [0.0], [0.0], [0], [None]
+
+        _, indices = self._get_k_nearest_neighbors_indices(keys, k)
+
+        embeddings = []
+        values = []
+        additional_data = []
+        for ind in indices:
+            self.lru_timestamps[ind] = self.current_timestamp
+            embeddings.append(self.embeddings[ind])
+            values.append(self.values[ind])
+            curr_additional_data = []
+            for sub_ind in ind:
+                curr_additional_data.append(self.additional_data[sub_ind])
+            additional_data.append(curr_additional_data)
+
+        self.current_timestamp += 1
+
+        return embeddings, values, indices, additional_data
+
+    def has_enough_entries(self, k):
+        return self.curr_size > k and (self.built_capacity > k)
+
+    def sample_embeddings(self, num_embeddings):
+        return self.embeddings[np.random.choice(self.curr_size, num_embeddings)]
+
+    def _get_k_nearest_neighbors_indices(self, keys, k):
+        distances = []
+        indices = []
+        for key in keys:
+            index, distance = self.index.get_nns_by_vector(key, k, include_distances=True)
+            distances.append(distance)
+            indices.append(index)
+        return distances, indices
+
+    def _rebuild_index(self):
+        self.index.unbuild()
+        self.embeddings[self.buffered_indices] = self.buffered_keys
+        self.values[self.buffered_indices] = np.squeeze(self.buffered_values)
+        for i, data in zip(self.buffered_indices, self.buffered_additional_data):
+            self.additional_data[i] = data
+        for idx, key in zip(self.buffered_indices, self.buffered_keys):
+            self.index.add_item(idx, key)
+
+        self._reset_buffer()
+
+        self.index.build(self.num_neighbors)
+        self.built_capacity = self.curr_size
+
+    def _reset_buffer(self):
+        self.buffered_keys = np.zeros((0, self.key_dimension))
+        self.buffered_values = np.zeros((0, self.value_dimension))
+        self.buffered_indices = []
+        self.buffered_additional_data = []
+
+    def _lookup_key_index(self, key):
+        distance, index = self._get_k_nearest_neighbors_indices([key], 1)
+        if distance != [[]] and distance[0][0] <= self.key_error_threshold:
+            return index
+        return None
+
+
+
[docs]class QDND(object): + def __init__(self, dict_size, key_width, num_actions, new_value_shift_coefficient=0.1, key_error_threshold=0.01, + learning_rate=0.01, num_neighbors=50, return_additional_data=False, override_existing_keys=False, + rebuild_on_every_update=False): + self.dict_size = dict_size + self.key_width = key_width + self.num_actions = num_actions + self.new_value_shift_coefficient = new_value_shift_coefficient + self.key_error_threshold = key_error_threshold + self.learning_rate = learning_rate + self.num_neighbors = num_neighbors + self.return_additional_data = return_additional_data + self.override_existing_keys = override_existing_keys + self.dicts = [] + + # create a dict for each action + for a in range(num_actions): + new_dict = AnnoyDictionary(dict_size, key_width, new_value_shift_coefficient, + key_error_threshold=key_error_threshold, num_neighbors=num_neighbors, + override_existing_keys=override_existing_keys, + rebuild_on_every_update=rebuild_on_every_update) + self.dicts.append(new_dict) + + def add(self, embeddings, actions, values, additional_data=None): + # add a new set of embeddings and values to each of the underlining dictionaries + embeddings = np.array(embeddings) + actions = np.array(actions) + values = np.array(values) + for a in range(self.num_actions): + idx = np.where(actions == a) + curr_action_embeddings = embeddings[idx] + curr_action_values = np.expand_dims(values[idx], -1) + if additional_data: + curr_additional_data = [] + for i in idx[0]: + curr_additional_data.append(additional_data[i]) + else: + curr_additional_data = None + + self.dicts[a].add(curr_action_embeddings, curr_action_values, curr_additional_data) + return True + + def query(self, embeddings, action, k): + # query for nearest neighbors to the given embeddings + dnd_embeddings = [] + dnd_values = [] + dnd_indices = [] + dnd_additional_data = [] + for i in range(len(embeddings)): + embedding, value, indices, additional_data = self.dicts[action].query([embeddings[i]], k) + dnd_embeddings.append(embedding[0]) + dnd_values.append(value[0]) + dnd_indices.append(indices[0]) + dnd_additional_data.append(additional_data[0]) + + if self.return_additional_data: + return dnd_embeddings, dnd_values, dnd_indices, dnd_additional_data + else: + return dnd_embeddings, dnd_values, dnd_indices + + def has_enough_entries(self, k): + # check if each of the action dictionaries has at least k entries + for a in range(self.num_actions): + if not self.dicts[a].has_enough_entries(k): + return False + return True + + def update_keys_and_values(self, actions, key_gradients, value_gradients, indices): + # Update DND keys and values + for batch_action, batch_keys, batch_values, batch_indices in zip(actions, key_gradients, value_gradients, indices): + # Update keys (embeddings) and values in DND + for i, index in enumerate(batch_indices): + self.dicts[batch_action].embeddings[index, :] -= self.learning_rate * batch_keys[i, :] + self.dicts[batch_action].values[index] -= self.learning_rate * batch_values[i] + + def sample_embeddings(self, num_embeddings): + num_actions = len(self.dicts) + embeddings = [] + num_embeddings_per_action = int(num_embeddings/num_actions) + for action in range(num_actions): + embeddings.append(self.dicts[action].sample_embeddings(num_embeddings_per_action)) + embeddings = np.vstack(embeddings) + + # the numbers did not divide nicely, let's just randomly sample some more embeddings + if num_embeddings_per_action * num_actions < num_embeddings: + action = np.random.randint(0, num_actions) + extra_embeddings = self.dicts[action].sample_embeddings(num_embeddings - + num_embeddings_per_action * num_actions) + embeddings = np.vstack([embeddings, extra_embeddings]) + return embeddings + + def clean(self): + # create a new dict for each action + self.dicts = [] + for a in range(self.num_actions): + new_dict = AnnoyDictionary(self.dict_size, self.key_width, self.new_value_shift_coefficient, + key_error_threshold=self.key_error_threshold, num_neighbors=self.num_neighbors) + self.dicts.append(new_dict)
+ + +def load_dnd(model_dir): + max_id = 0 + + for f in [s for s in os.listdir(model_dir) if s.endswith('.dnd')]: + if int(f.split('.')[0]) > max_id: + max_id = int(f.split('.')[0]) + + model_path = str(max_id) + '.dnd' + with open(os.path.join(model_dir, model_path), 'rb') as f: + DND = pickle.load(f) + + for a in range(DND.num_actions): + DND.dicts[a].index = AnnoyIndex(512, metric='euclidean') + DND.dicts[a].index.set_seed(1) + + for idx, key in zip(range(DND.dicts[a].curr_size), DND.dicts[a].embeddings[:DND.dicts[a].curr_size]): + DND.dicts[a].index.add_item(idx, key) + + DND.dicts[a].index.build(50) + + return DND +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html new file mode 100644 index 0000000..660b17f --- /dev/null +++ b/docs/_modules/rl_coach/memories/non_episodic/experience_replay.html @@ -0,0 +1,467 @@ + + + + + + + + + + + rl_coach.memories.non_episodic.experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.non_episodic.experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.non_episodic.experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Tuple, Union, Dict, Any
+import pickle
+import sys
+import time
+
+import numpy as np
+
+from rl_coach.core_types import Transition
+from rl_coach.logger import screen
+from rl_coach.memories.memory import Memory, MemoryGranularity, MemoryParameters
+from rl_coach.utils import ReaderWriterLock, ProgressBar
+
+
+class ExperienceReplayParameters(MemoryParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 1000000)
+        self.allow_duplicates_in_batch_sampling = True
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.non_episodic.experience_replay:ExperienceReplay'
+
+
+
[docs]class ExperienceReplay(Memory): + """ + A regular replay buffer which stores transition without any additional structure + """ + def __init__(self, max_size: Tuple[MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool=True): + """ + :param max_size: the maximum number of transitions or episodes to hold in the memory + :param allow_duplicates_in_batch_sampling: allow having the same transition multiple times in a batch + """ + super().__init__(max_size) + if max_size[0] != MemoryGranularity.Transitions: + raise ValueError("Experience replay size can only be configured in terms of transitions") + self.transitions = [] + self.allow_duplicates_in_batch_sampling = allow_duplicates_in_batch_sampling + + self.reader_writer_lock = ReaderWriterLock() + + def length(self) -> int: + """ + Get the number of transitions in the ER + """ + return self.num_transitions() + + def num_transitions(self) -> int: + """ + Get the number of transitions in the ER + """ + return len(self.transitions) + + def sample(self, size: int) -> List[Transition]: + """ + Sample a batch of transitions form the replay buffer. If the requested size is larger than the number + of samples available in the replay buffer then the batch will return empty. + :param size: the size of the batch to sample + :param beta: the beta parameter used for importance sampling + :return: a batch (list) of selected transitions from the replay buffer + """ + self.reader_writer_lock.lock_writing() + + if self.allow_duplicates_in_batch_sampling: + transitions_idx = np.random.randint(self.num_transitions(), size=size) + + else: + if self.num_transitions() >= size: + transitions_idx = np.random.choice(self.num_transitions(), size=size, replace=False) + else: + raise ValueError("The replay buffer cannot be sampled since there are not enough transitions yet. " + "There are currently {} transitions".format(self.num_transitions())) + + batch = [self.transitions[i] for i in transitions_idx] + + self.reader_writer_lock.release_writing() + return batch + + def _enforce_max_length(self) -> None: + """ + Make sure that the size of the replay buffer does not pass the maximum size allowed. + If it passes the max size, the oldest transition in the replay buffer will be removed. + This function does not use locks since it is only called internally + :return: None + """ + granularity, size = self.max_size + if granularity == MemoryGranularity.Transitions: + while size != 0 and self.num_transitions() > size: + self.remove_transition(0, False) + else: + raise ValueError("The granularity of the replay buffer can only be set in terms of transitions") + + def store(self, transition: Transition, lock: bool=True) -> None: + """ + Store a new transition in the memory. + :param transition: a transition to store + :param lock: if true, will lock the readers writers lock. this can cause a deadlock if an inheriting class + locks and then calls store with lock = True + :return: None + """ + # Calling super.store() so that in case a memory backend is used, the memory backend can store this transition. + super().store(transition) + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + self.transitions.append(transition) + self._enforce_max_length() + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def get_transition(self, transition_index: int, lock: bool=True) -> Union[None, Transition]: + """ + Returns the transition in the given index. If the transition does not exist, returns None instead. + :param transition_index: the index of the transition to return + :param lock: use write locking if this is a shared memory + :return: the corresponding transition + """ + if lock: + self.reader_writer_lock.lock_writing() + + if self.length() == 0 or transition_index >= self.length(): + transition = None + else: + transition = self.transitions[transition_index] + + if lock: + self.reader_writer_lock.release_writing() + + return transition + + def remove_transition(self, transition_index: int, lock: bool=True) -> None: + """ + Remove the transition in the given index. + + This does not remove the transition from the segment trees! it is just used to remove the transition + from the transitions list + :param transition_index: the index of the transition to remove + :return: None + """ + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + if self.num_transitions() > transition_index: + del self.transitions[transition_index] + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + # for API compatibility + def get(self, transition_index: int, lock: bool=True) -> Union[None, Transition]: + """ + Returns the transition in the given index. If the transition does not exist, returns None instead. + :param transition_index: the index of the transition to return + :return: the corresponding transition + """ + return self.get_transition(transition_index, lock) + + # for API compatibility + def remove(self, transition_index: int, lock: bool=True): + """ + Remove the transition in the given index + :param transition_index: the index of the transition to remove + :return: None + """ + self.remove_transition(transition_index, lock) + + def clean(self, lock: bool=True) -> None: + """ + Clean the memory by removing all the episodes + :return: None + """ + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + self.transitions = [] + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def mean_reward(self) -> np.ndarray: + """ + Get the mean reward in the replay buffer + :return: the mean reward + """ + self.reader_writer_lock.lock_writing() + + mean = np.mean([transition.reward for transition in self.transitions]) + + self.reader_writer_lock.release_writing() + + return mean + + def save(self, file_path: str) -> None: + """ + Save the replay buffer contents to a pickle file + :param file_path: the path to the file that will be used to store the pickled transitions + """ + with open(file_path, 'wb') as file: + pickle.dump(self.transitions, file) + + def load(self, file_path: str) -> None: + """ + Restore the replay buffer contents from a pickle file. + The pickle file is assumed to include a list of transitions. + :param file_path: The path to a pickle file to restore + """ + with open(file_path, 'rb') as file: + transitions = pickle.load(file) + num_transitions = len(transitions) + if num_transitions > self.max_size[1]: + screen.warning("Warning! The number of transition to load into the replay buffer ({}) is " + "bigger than the max size of the replay buffer ({}). The excessive transitions will " + "not be stored.".format(num_transitions, self.max_size[1])) + + progress_bar = ProgressBar(num_transitions) + for transition_idx, transition in enumerate(transitions): + self.store(transition) + + # print progress + if transition_idx % 100 == 0: + progress_bar.update(transition_idx) + + progress_bar.close()
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html b/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html new file mode 100644 index 0000000..a516e7c --- /dev/null +++ b/docs/_modules/rl_coach/memories/non_episodic/prioritized_experience_replay.html @@ -0,0 +1,526 @@ + + + + + + + + + + + rl_coach.memories.non_episodic.prioritized_experience_replay — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.non_episodic.prioritized_experience_replay
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.non_episodic.prioritized_experience_replay

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import operator
+import random
+from enum import Enum
+from typing import List, Tuple, Any
+
+import numpy as np
+
+from rl_coach.core_types import Transition
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters, ExperienceReplay
+from rl_coach.schedules import Schedule, ConstantSchedule
+
+
+class PrioritizedExperienceReplayParameters(ExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 1000000)
+        self.alpha = 0.6
+        self.beta = ConstantSchedule(0.4)
+        self.epsilon = 1e-6
+
+    @property
+    def path(self):
+        return 'rl_coach.memories.non_episodic.prioritized_experience_replay:PrioritizedExperienceReplay'
+
+
+class SegmentTree(object):
+    """
+    A tree which can be used as a min/max heap or a sum tree
+    Add or update item value - O(log N)
+    Sampling an item - O(log N)
+    """
+    class Operation(Enum):
+        MAX = {"operator": max, "initial_value": -float("inf")}
+        MIN = {"operator": min, "initial_value": float("inf")}
+        SUM = {"operator": operator.add, "initial_value": 0}
+
+    def __init__(self, size: int, operation: Operation):
+        self.next_leaf_idx_to_write = 0
+        self.size = size
+        if not (size > 0 and size & (size - 1) == 0):
+            raise ValueError("A segment tree size must be a positive power of 2. The given size is {}".format(self.size))
+        self.operation = operation
+        self.tree = np.ones(2 * size - 1) * self.operation.value['initial_value']
+        self.data = [None] * size
+
+    def _propagate(self, node_idx: int) -> None:
+        """
+        Propagate an update of a node's value to its parent node
+        :param node_idx: the index of the node that was updated
+        :return: None
+        """
+        parent = (node_idx - 1) // 2
+
+        self.tree[parent] = self.operation.value['operator'](self.tree[parent * 2 + 1], self.tree[parent * 2 + 2])
+
+        if parent != 0:
+            self._propagate(parent)
+
+    def _retrieve(self, root_node_idx: int, val: float)-> int:
+        """
+        Retrieve the first node that has a value larger than val and is a child of the node at index idx
+        :param root_node_idx: the index of the root node to search from
+        :param val: the value to query for
+        :return: the index of the resulting node
+        """
+        left = 2 * root_node_idx + 1
+        right = left + 1
+
+        if left >= len(self.tree):
+            return root_node_idx
+
+        if val <= self.tree[left]:
+            return self._retrieve(left, val)
+        else:
+            return self._retrieve(right, val-self.tree[left])
+
+    def total_value(self) -> float:
+        """
+        Return the total value of the tree according to the tree operation. For SUM for example, this will return
+        the total sum of the tree. for MIN, this will return the minimal value
+        :return: the total value of the tree
+        """
+        return self.tree[0]
+
+    def add(self, val: float, data: Any) -> None:
+        """
+        Add a new value to the tree with data assigned to it
+        :param val: the new value to add to the tree
+        :param data: the data that should be assigned to this value
+        :return: None
+        """
+        self.data[self.next_leaf_idx_to_write] = data
+        self.update(self.next_leaf_idx_to_write, val)
+
+        self.next_leaf_idx_to_write += 1
+        if self.next_leaf_idx_to_write >= self.size:
+            self.next_leaf_idx_to_write = 0
+
+    def update(self, leaf_idx: int, new_val: float) -> None:
+        """
+        Update the value of the node at index idx
+        :param leaf_idx: the index of the node to update
+        :param new_val: the new value of the node
+        :return: None
+        """
+        node_idx = leaf_idx + self.size - 1
+        if not 0 <= node_idx < len(self.tree):
+            raise ValueError("The given left index ({}) can not be found in the tree. The available leaves are: 0-{}"
+                             .format(leaf_idx, self.size - 1))
+
+        self.tree[node_idx] = new_val
+        self._propagate(node_idx)
+
+    def get_element_by_partial_sum(self, val: float) -> Tuple[int, float, Any]:
+        """
+        Given a value between 0 and the tree sum, return the object which this value is in it's range.
+        For example, if we have 3 leaves: 10, 20, 30, and val=35, this will return the 3rd leaf, by accumulating
+        leaves by their order until getting to 35. This allows sampling leaves according to their proportional
+        probability.
+        :param val: a value within the range 0 and the tree sum
+        :return: the index of the resulting leaf in the tree, its probability and
+                 the object itself
+        """
+        node_idx = self._retrieve(0, val)
+        leaf_idx = node_idx - self.size + 1
+        data_value = self.tree[node_idx]
+        data = self.data[leaf_idx]
+
+        return leaf_idx, data_value, data
+
+    def __str__(self):
+        result = ""
+        start = 0
+        size = 1
+        while size <= self.size:
+            result += "{}\n".format(self.tree[start:(start + size)])
+            start += size
+            size *= 2
+        return result
+
+
+
[docs]class PrioritizedExperienceReplay(ExperienceReplay): + """ + This is the proportional sampling variant of the prioritized experience replay as described + in https://arxiv.org/pdf/1511.05952.pdf. + """ + def __init__(self, max_size: Tuple[MemoryGranularity, int], alpha: float=0.6, beta: Schedule=ConstantSchedule(0.4), + epsilon: float=1e-6, allow_duplicates_in_batch_sampling: bool=True): + """ + :param max_size: the maximum number of transitions or episodes to hold in the memory + :param alpha: the alpha prioritization coefficient + :param beta: the beta parameter used for importance sampling + :param epsilon: a small value added to the priority of each transition + :param allow_duplicates_in_batch_sampling: allow having the same transition multiple times in a batch + """ + if max_size[0] != MemoryGranularity.Transitions: + raise ValueError("Prioritized Experience Replay currently only support setting the memory size in " + "transitions granularity.") + self.power_of_2_size = 1 + while self.power_of_2_size < max_size[1]: + self.power_of_2_size *= 2 + super().__init__((MemoryGranularity.Transitions, self.power_of_2_size), allow_duplicates_in_batch_sampling) + self.sum_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.SUM) + self.min_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MIN) + self.max_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MAX) + self.alpha = alpha + self.beta = beta + self.epsilon = epsilon + self.maximal_priority = 1.0 + + def _update_priority(self, leaf_idx: int, error: float) -> None: + """ + Update the priority of a given transition, using its index in the tree and its error + :param leaf_idx: the index of the transition leaf in the tree + :param error: the new error value + :return: None + """ + if error < 0: + raise ValueError("The priorities must be non-negative values") + priority = (error + self.epsilon) + self.sum_tree.update(leaf_idx, priority ** self.alpha) + self.min_tree.update(leaf_idx, priority ** self.alpha) + self.max_tree.update(leaf_idx, priority) + self.maximal_priority = self.max_tree.total_value() + + def update_priorities(self, indices: List[int], error_values: List[float]) -> None: + """ + Update the priorities of a batch of transitions using their indices and their new TD error terms + :param indices: the indices of the transitions to update + :param error_values: the new error values + :return: None + """ + self.reader_writer_lock.lock_writing_and_reading() + + if len(indices) != len(error_values): + raise ValueError("The number of indexes requested for update don't match the number of error values given") + for transition_idx, error in zip(indices, error_values): + self._update_priority(transition_idx, error) + + self.reader_writer_lock.release_writing_and_reading() + + def sample(self, size: int) -> List[Transition]: + """ + Sample a batch of transitions form the replay buffer. If the requested size is larger than the number + of samples available in the replay buffer then the batch will return empty. + :param size: the size of the batch to sample + :return: a batch (list) of selected transitions from the replay buffer + """ + + self.reader_writer_lock.lock_writing() + + if self.num_transitions() >= size: + # split the tree leaves to equal segments and sample one transition from each segment + batch = [] + segment_size = self.sum_tree.total_value() / size + + # get the maximum weight in the memory + min_probability = self.min_tree.total_value() / self.sum_tree.total_value() # min P(j) = min p^a / sum(p^a) + max_weight = (min_probability * self.num_transitions()) ** -self.beta.current_value # max wi + + # sample a batch + for i in range(size): + segment_start = segment_size * i + segment_end = segment_size * (i + 1) + + # sample leaf and calculate its weight + val = random.uniform(segment_start, segment_end) + leaf_idx, priority, transition = self.sum_tree.get_element_by_partial_sum(val) + priority /= self.sum_tree.total_value() # P(j) = p^a / sum(p^a) + weight = (self.num_transitions() * priority) ** -self.beta.current_value # (N * P(j)) ^ -beta + normalized_weight = weight / max_weight # wj = ((N * P(j)) ^ -beta) / max wi + + transition.info['idx'] = leaf_idx + transition.info['weight'] = normalized_weight + + batch.append(transition) + + self.beta.step() + + else: + raise ValueError("The replay buffer cannot be sampled since there are not enough transitions yet. " + "There are currently {} transitions".format(self.num_transitions())) + + self.reader_writer_lock.release_writing() + return batch + + def store(self, transition: Transition, lock=True) -> None: + """ + Store a new transition in the memory. + :param transition: a transition to store + :return: None + """ + # Calling super.store() so that in case a memory backend is used, the memory backend can store this transition. + super().store(transition) + + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + transition_priority = self.maximal_priority + self.sum_tree.add(transition_priority ** self.alpha, transition) + self.min_tree.add(transition_priority ** self.alpha, transition) + self.max_tree.add(transition_priority, transition) + super().store(transition, False) + + if lock: + self.reader_writer_lock.release_writing_and_reading() + + def clean(self, lock=True) -> None: + """ + Clean the memory by removing all the episodes + :return: None + """ + if lock: + self.reader_writer_lock.lock_writing_and_reading() + + super().clean(lock=False) + self.sum_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.SUM) + self.min_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MIN) + self.max_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MAX) + + if lock: + self.reader_writer_lock.release_writing_and_reading()
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html new file mode 100644 index 0000000..1b18ee7 --- /dev/null +++ b/docs/_modules/rl_coach/memories/non_episodic/transition_collection.html @@ -0,0 +1,263 @@ + + + + + + + + + + + rl_coach.memories.non_episodic.transition_collection — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Module code »
  • + +
  • rl_coach.memories.non_episodic.transition_collection
  • + + +
  • + +
  • + +
+ + +
+
+
+
+ +

Source code for rl_coach.memories.non_episodic.transition_collection

+from rl_coach.core_types import Transition
+
+
+
[docs]class TransitionCollection(object): + """ + Simple python implementation of transitions collection non-episodic memories + are constructed on top of. + """ + def __init__(self): + super(TransitionCollection, self).__init__() + + def append(self, transition): + pass + + def extend(self, transitions): + for transition in transitions: + self.append(transition) + + def __len__(self): + pass + + def __del__(self, range: slice): + # NOTE: the only slice used is the form: slice(None, n) + # NOTE: if it is easier, what we really want here is the ability to + # constrain the size of the collection. as new transitions are added, + # old transitions can be removed to maintain a maximum collection size. + pass + + def __getitem__(self, key: int): + # NOTE: we can switch to a method which fetches multiple items at a time + # if that would significantly improve performance + pass + + def __iter__(self): + # this is not high priority + pass
+
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_modules/rl_coach/spaces.html b/docs/_modules/rl_coach/spaces.html new file mode 100644 index 0000000..65f3bc5 --- /dev/null +++ b/docs/_modules/rl_coach/spaces.html @@ -0,0 +1,858 @@ + + + + + + + + + + + rl_coach.spaces — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +

Source code for rl_coach.spaces

+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import random
+from enum import Enum
+from itertools import product
+from typing import Union, List, Dict, Tuple, Callable
+
+import numpy as np
+import scipy
+import scipy.spatial
+
+from rl_coach.core_types import ActionType, ActionInfo
+from rl_coach.utils import eps
+
+
+
[docs]class Space(object): + """ + A space defines a set of valid values + """ + def __init__(self, shape: Union[int, tuple, list, np.ndarray], low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf): + """ + :param shape: the shape of the space + :param low: the lowest values possible in the space. can be an array defining the lowest values per point, + or a single value defining the general lowest values + :param high: the highest values possible in the space. can be an array defining the highest values per point, + or a single value defining the general highest values + """ + + # the number of dimensions is the number of axes in the shape. it will be set in the shape setter + self.num_dimensions = 0 + + # the number of elements is the number of possible actions if the action space was discrete. + # it will be set in the shape setter + self.num_elements = 0 + + self._low = self._high = None + self._shape = self.shape = shape + self._low = self.low = low + self._high = self.high = high + + # we allow zero sized spaces which means that the space is empty. this is useful for environments with no + # measurements for example. + if type(shape) == int and shape < 0: + raise ValueError("The shape of the space must be a non-negative number") + + @property + def shape(self): + return self._shape + + @shape.setter + def shape(self, val: Union[int, tuple, list, np.ndarray]): + # convert the shape to an np.ndarray + self._shape = val + if type(self._shape) == int: + self._shape = np.array([self._shape]) + if type(self._shape) == tuple or type(self._shape) == list: + self._shape = np.array(self._shape) + + # the shape is now an np.ndarray + self.num_dimensions = len(self._shape) + self.num_elements = int(np.prod(self._shape)) + + @property + def low(self): + if hasattr(self, '_low'): + return self._low + else: + return None + + @low.setter + def low(self, val: Union[None, int, float, np.ndarray]): + if type(val) == np.ndarray and type(self.shape) == np.ndarray and np.all(val.shape != self.shape): + raise ValueError("The low values shape don't match the shape of the space") + elif self.high is not None and not np.all(self.high >= val): + raise ValueError("At least one of the axes-parallel lines defining the space has high values which " + "are lower than the given low values") + else: + self._low = val + # we allow using a number to define the low values, but we immediately convert it to an array which defines + # the low values for all the space dimensions in order to expose a consistent value type + if type(self._low) == int or type(self._low) == float: + self._low = np.ones(self.shape)*self._low + + @property + def high(self): + if hasattr(self, '_high'): + return self._high + else: + return None + + @high.setter + def high(self, val: Union[None, int, float, np.ndarray]): + if type(val) == np.ndarray and type(self.shape) == np.ndarray and np.all(val.shape != self.shape): + raise ValueError("The high values shape don't match the shape of the space") + elif self.low is not None and not np.all(self.low <= val): + raise ValueError("At least one of the axes-parallel lines defining the space has low values which " + "are higher than the given high values") + else: + self._high = val + # we allow using a number to define the high values, but we immediately convert it to an array which defines + # the high values for all the space dimensions in order to expose a consistent value type + if type(self._high) == int or type(self._high) == float: + self._high = np.ones(self.shape)*self._high + +
[docs] def val_matches_space_definition(self, val: Union[int, float, np.ndarray]) -> bool: + """ + Checks if the given value matches the space definition in terms of shape and values + + :param val: a value to check + :return: True / False depending on if the val matches the space definition + """ + if (type(val) == int or type(val) == float) and not np.all(self.shape == np.ones(1)): + return False + if type(val) == np.ndarray and not np.all(val.shape == self.shape): + return False + if (self.low is not None and not np.all(val >= self.low)) \ + or (self.high is not None and not np.all(val <= self.high)): + # TODO: check the performance overhead this causes + return False + return True
+ +
[docs] def is_point_in_space_shape(self, point: np.ndarray) -> bool: + """ + Checks if a given multidimensional point is within the bounds of the shape of the space + + :param point: a multidimensional point + :return: True if the point is within the shape of the space. False otherwise + """ + if len(point) != self.num_dimensions: + return False + if np.any(point < np.zeros(self.num_dimensions)) or np.any(point >= self.shape): + return False + return True
+ +
[docs] def sample(self) -> np.ndarray: + """ + Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no + bounds are defined + + :return: A numpy array sampled from the space + """ + # if there are infinite bounds, we sample using gaussian noise with mean 0 and std 1 + if np.any(self.low == -np.inf) or np.any(self.high == np.inf): + return np.random.normal(0, 1, self.shape) + else: + return np.random.uniform(self.low, self.high, self.shape)
+ + +class RewardSpace(Space): + def __init__(self, shape: Union[int, np.ndarray], low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf, + reward_success_threshold: Union[None, int, float]=None): + super().__init__(shape, low, high) + self.reward_success_threshold = reward_success_threshold + + +""" +Observation Spaces +""" + + +
[docs]class ObservationSpace(Space): + def __init__(self, shape: Union[int, np.ndarray], low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf): + super().__init__(shape, low, high)
+ + +
[docs]class VectorObservationSpace(ObservationSpace): + """ + An observation space which is defined as a vector of elements. This can be particularly useful for environments + which return measurements, such as in robotic environmnets. + """ + def __init__(self, shape: int, low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf, measurements_names: List[str]=None): + if measurements_names is None: + measurements_names = [] + if len(measurements_names) > shape: + raise ValueError("measurement_names size {} is larger than shape {}.".format( + len(measurements_names), shape)) + + self.measurements_names = measurements_names + super().__init__(shape, low, high)
+ + +
[docs]class PlanarMapsObservationSpace(ObservationSpace): + """ + An observation space which defines a stack of 2D observations. For example, an environment which returns + a stack of segmentation maps like in Starcraft. + """ + def __init__(self, shape: Union[np.ndarray], low: int, high: int, channels_axis: int=-1): + super().__init__(shape, low, high) + self.channels_axis = channels_axis + + if not 2 <= len(shape) <= 3: + raise ValueError("Planar maps observations must have 3 dimensions - a channels dimension and 2 maps " + "dimensions, not {}".format(len(shape))) + if len(shape) == 2: + self.channels = 1 + else: + self.channels = shape[channels_axis]
+ + +
[docs]class ImageObservationSpace(PlanarMapsObservationSpace): + """ + An observation space which is a private case of the PlanarMapsObservationSpace, where the stack of 2D observations + represent a RGB image, or a grayscale image. + """ + def __init__(self, shape: Union[np.ndarray], high: int, channels_axis: int=-1): + # TODO: consider allowing arbitrary low values for images + super().__init__(shape, 0, high, channels_axis) + self.has_colors = self.channels == 3 + if not self.channels == 3 and not self.channels == 1: + raise ValueError("Image observations must have 1 or 3 channels, not {}".format(self.channels))
+ + +# TODO: mixed observation spaces (image + measurements, image + segmentation + depth map, etc.) +class StateSpace(object): + def __init__(self, sub_spaces: Dict[str, Space]): + self.sub_spaces = sub_spaces + + def __getitem__(self, item): + return self.sub_spaces[item] + + def __setitem__(self, key, value): + self.sub_spaces[key] = value + + +""" +Action Spaces +""" + + +
[docs]class ActionSpace(Space): + def __init__(self, shape: Union[int, np.ndarray], low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf, descriptions: Union[None, List, Dict]=None, + default_action: ActionType=None): + super().__init__(shape, low, high) + # we allow a mismatch between the number of descriptions and the number of actions. + # in this case the descriptions for the actions that were not given will be the action index + if descriptions is not None: + self.descriptions = descriptions + else: + self.descriptions = {} + self.default_action = default_action + + @property + def actions(self) -> List[ActionType]: + raise NotImplementedError("The action space does not have an explicit actions list") + +
[docs] def sample_with_info(self) -> ActionInfo: + """ + Get a random action with additional "fake" info + + :return: An action info instance + """ + return ActionInfo(self.sample())
+ +
[docs] def clip_action_to_space(self, action: ActionType) -> ActionType: + """ + Given an action, clip its values to fit to the action space ranges + + :param action: a given action + :return: the clipped action + """ + return action
+ + def get_description(self, action: np.ndarray) -> str: + raise NotImplementedError("") + + def __str__(self): + return "{}: shape = {}, low = {}, high = {}".format(self.__class__.__name__, self.shape, self.low, self.high) + + def __repr__(self): + return self.__str__()
+ + +
[docs]class AttentionActionSpace(ActionSpace): + """ + A box selection continuous action space, meaning that the actions are defined as selecting a multidimensional box + from a given range. + The actions will be in the form: + [[low_x, low_y, ...], [high_x, high_y, ...]] + """ + def __init__(self, shape: int, low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf, descriptions: Union[None, List, Dict]=None, + default_action: np.ndarray = None, forced_attention_size: Union[None, int, float, np.ndarray]=None): + super().__init__(shape, low, high, descriptions) + + self.forced_attention_size = forced_attention_size + if isinstance(self.forced_attention_size, int) or isinstance(self.forced_attention_size, float): + self.forced_attention_size = np.ones(self.shape) * self.forced_attention_size + + if self.forced_attention_size is not None and np.all(self.forced_attention_size > (self.high - self.low)): + raise ValueError("The forced attention size is larger than the action space") + + # default action + if default_action is None: + if self.forced_attention_size is not None: + self.default_action = [self.low*np.ones(self.shape), + (self.low+self.forced_attention_size)*np.ones(self.shape)] + else: + self.default_action = [self.low*np.ones(self.shape), self.high*np.ones(self.shape)] + else: + self.default_action = default_action + + def sample(self) -> List: + if self.forced_attention_size is not None: + sampled_low = np.random.uniform(self.low, self.high-self.forced_attention_size, self.shape) + sampled_high = sampled_low + self.forced_attention_size + else: + sampled_low = np.random.uniform(self.low, self.high, self.shape) + sampled_high = np.random.uniform(sampled_low, self.high, self.shape) + return [sampled_low, sampled_high] + + def clip_action_to_space(self, action: ActionType) -> ActionType: + action = [np.clip(action[0], self.low, self.high), np.clip(action[1], self.low, self.high)] + return action
+ + +
[docs]class BoxActionSpace(ActionSpace): + """ + A multidimensional bounded or unbounded continuous action space + """ + def __init__(self, shape: Union[int, np.ndarray], low: Union[None, int, float, np.ndarray]=-np.inf, + high: Union[None, int, float, np.ndarray]=np.inf, descriptions: Union[None, List, Dict]=None, + default_action: np.ndarray=None): + super().__init__(shape, low, high, descriptions) + self.max_abs_range = np.maximum(np.abs(self.low), np.abs(self.high)) + + # default action + if default_action is None: + if np.any(np.isinf(self.low)) or np.any(np.isinf(self.high)): + self.default_action = np.zeros(shape) + else: + self.default_action = self.low + (self.high - self.low) / 2 + else: + self.default_action = default_action + + def clip_action_to_space(self, action: ActionType) -> ActionType: + action = np.clip(action, self.low, self.high) + return action
+ + +
[docs]class DiscreteActionSpace(ActionSpace): + """ + A discrete action space with action indices as actions + """ + def __init__(self, num_actions: int, descriptions: Union[None, List, Dict]=None, default_action: np.ndarray=None): + super().__init__(1, low=0, high=num_actions-1, descriptions=descriptions) + # the number of actions is mapped to high + + # default action + if default_action is None: + self.default_action = 0 + else: + self.default_action = default_action + + @property + def actions(self) -> List[ActionType]: + return list(range(0, int(self.high[0]) + 1)) + + def sample(self) -> int: + return np.random.choice(self.actions) + + def sample_with_info(self) -> ActionInfo: + return ActionInfo(self.sample(), action_probability=1. / (self.high[0] - self.low[0] + 1)) + + def get_description(self, action: int) -> str: + if type(self.descriptions) == list and 0 <= action < len(self.descriptions): + return self.descriptions[action] + elif type(self.descriptions) == dict and action in self.descriptions.keys(): + return self.descriptions[action] + elif 0 <= action < self.shape: + return str(action) + else: + raise ValueError("The given action is outside of the action space")
+ + +
[docs]class MultiSelectActionSpace(ActionSpace): + """ + A discrete action space where multiple actions can be selected at once. The actions are encoded as multi-hot vectors + """ + def __init__(self, size: int, max_simultaneous_selected_actions: int=1, descriptions: Union[None, List, Dict]=None, + default_action: np.ndarray=None, allow_no_action_to_be_selected=True): + super().__init__(size, low=None, high=None, descriptions=descriptions) + self.max_simultaneous_selected_actions = max_simultaneous_selected_actions + + if max_simultaneous_selected_actions > size: + raise ValueError("The maximum simultaneous selected actions can't be larger the max number of actions") + + # create all combinations of actions as a list of actions + I = [np.eye(size)]*self.max_simultaneous_selected_actions + self._actions = [] + if allow_no_action_to_be_selected: + self._actions.append(np.zeros(size)) + self._actions.extend(list(np.unique([np.clip(np.sum(t, axis=0), 0, 1) for t in product(*I)], axis=0))) + + # default action + if default_action is None: + self.default_action = self._actions[0] + else: + self.default_action = default_action + + @property + def actions(self) -> List[ActionType]: + return self._actions + + def sample(self) -> np.ndarray: + # samples a multi-hot vector + return random.choice(self.actions) + + def sample_with_info(self) -> ActionInfo: + return ActionInfo(self.sample(), action_probability=1. / len(self.actions)) + + def get_description(self, action: np.ndarray) -> str: + if np.sum(len(np.where(action == 0)[0])) + np.sum(len(np.where(action == 1)[0])) != self.shape or \ + np.sum(len(np.where(action == 1)[0])) > self.max_simultaneous_selected_actions: + raise ValueError("The given action is not in the action space") + selected_actions = np.where(action == 1)[0] + description = [self.descriptions[a] for a in selected_actions] + if len(description) == 0: + description = ['no-op'] + return ' + '.join(description)
+ + +
[docs]class CompoundActionSpace(ActionSpace): + """ + An action space which consists of multiple sub-action spaces. + For example, in Starcraft the agent should choose an action identifier from ~550 options (Discrete(550)), + but it also needs to choose 13 different arguments for the selected action identifier, where each argument is + by itself an action space. In Starcraft, the arguments are Discrete action spaces as well, but this is not mandatory. + """ + def __init__(self, sub_spaces: List[ActionSpace]): + super().__init__(0) + self.sub_action_spaces = sub_spaces + # TODO: define the shape, low and high value in a better way + + @property + def actions(self) -> List[ActionType]: + return [action_space.actions for action_space in self.sub_action_spaces] + + def sample(self) -> ActionType: + return [action_space.sample() for action_space in self.sub_action_spaces] + + def clip_action_to_space(self, actions: List[ActionType]) -> ActionType: + if not isinstance(actions, list) or len(actions) != len(self.sub_action_spaces): + raise ValueError("The actions to be clipped must be a list with the same number of sub-actions as " + "defined in the compound action space.") + for idx in range(len(self.sub_action_spaces)): + actions[idx] = self.sub_action_spaces[idx].clip_action_to_space(actions[idx]) + return actions + + def get_description(self, actions: np.ndarray) -> str: + description = [action_space.get_description(action) for action_space, action in zip(self.sub_action_spaces, actions)] + return ' + '.join(description)
+ + +""" +Goals +""" + + +class GoalToRewardConversion(object): + def __init__(self, goal_reaching_reward: float=0): + self.goal_reaching_reward = goal_reaching_reward + + def convert_distance_to_reward(self, distance: Union[float, np.ndarray]) -> Tuple[float, bool]: + """ + Given a distance from the goal, return a reward and a flag representing if the goal was reached + + :param distance: the distance from the goal + :return: + """ + raise NotImplementedError("") + + +class ReachingGoal(GoalToRewardConversion): + """ + get a reward if the goal was reached and 0 otherwise + """ + def __init__(self, distance_from_goal_threshold: Union[float, np.ndarray], goal_reaching_reward: float=0, + default_reward: float=-1): + """ + :param distance_from_goal_threshold: consider getting to this distance from the goal the same as getting + to the goal + :param goal_reaching_reward: the reward the agent will get when reaching the goal + :param default_reward: the reward the agent will get until it reaches the goal + """ + super().__init__(goal_reaching_reward) + self.distance_from_goal_threshold = distance_from_goal_threshold + self.default_reward = default_reward + + def convert_distance_to_reward(self, distance: Union[float, np.ndarray]) -> Tuple[float, bool]: + if np.all(distance <= self.distance_from_goal_threshold): + return self.goal_reaching_reward, True + else: + return self.default_reward, False + + +class InverseDistanceFromGoal(GoalToRewardConversion): + """ + get a reward inversely proportional to the distance from the goal + """ + def __init__(self, distance_from_goal_threshold: Union[float, np.ndarray], max_reward: float=1): + """ + :param distance_from_goal_threshold: consider getting to this distance from the goal the same as getting + to the goal + :param max_reward: the max reward the agent can get + """ + super().__init__(goal_reaching_reward=max_reward) + self.distance_from_goal_threshold = distance_from_goal_threshold + self.max_reward = max_reward + + def convert_distance_to_reward(self, distance: Union[float, np.ndarray]) -> Tuple[float, bool]: + return min(self.max_reward, 1 / (distance + eps)), distance <= self.distance_from_goal_threshold + + +
[docs]class GoalsSpace(VectorObservationSpace, ActionSpace): + """ + A multidimensional space with a goal type definition. It also behaves as an action space, so that hierarchical + agents can use it as an output action space. + The class acts as a wrapper to the target space. So after setting the target space, all the values of the class + will match the values of the target space (the shape, low, high, etc.) + """ +
[docs] class DistanceMetric(Enum): + Euclidean = 0 + Cosine = 1 + Manhattan = 2
+ + def __init__(self, goal_name: str, reward_type: GoalToRewardConversion, + distance_metric: Union[DistanceMetric, Callable]): + """ + :param goal_name: the name of the observation space to use as the achieved goal. + :param reward_type: the reward type to use for converting distances from goal to rewards + :param distance_metric: the distance metric to use. could be either one of the distances in the + DistanceMetric enum, or a custom function that gets two vectors as input and + returns the distance between them + """ + super().__init__(0) + self.goal_name = goal_name + self.distance_metric = distance_metric + self.reward_type = reward_type + self.target_space = None + self.max_abs_range = None + + def set_target_space(self, target_space: Space) -> None: + self.target_space = target_space + super().__init__(self.target_space.shape, self.target_space.low, self.target_space.high) + self.max_abs_range = np.maximum(np.abs(self.low), np.abs(self.high)) + +
[docs] def goal_from_state(self, state: Dict): + """ + Given a state, extract an observation according to the goal_name + + :param state: a dictionary of observations + :return: the observation corresponding to the goal_name + """ + return state[self.goal_name]
+ +
[docs] def distance_from_goal(self, goal: np.ndarray, state: dict) -> float: + """ + Given a state, check its distance from the goal + + :param goal: a numpy array representing the goal + :param state: a dict representing the state + :return: the distance from the goal + """ + state_value = self.goal_from_state(state) + + # calculate distance + if self.distance_metric == self.DistanceMetric.Cosine: + dist = scipy.spatial.distance.cosine(goal, state_value) + elif self.distance_metric == self.DistanceMetric.Euclidean: + dist = scipy.spatial.distance.euclidean(goal, state_value) + elif self.distance_metric == self.DistanceMetric.Manhattan: + dist = scipy.spatial.distance.cityblock(goal, state_value) + elif callable(self.distance_metric): + dist = self.distance_metric(goal, state_value) + else: + raise ValueError("The given distance metric for the goal is not valid.") + + return dist
+ +
[docs] def get_reward_for_goal_and_state(self, goal: np.ndarray, state: dict) -> Tuple[float, bool]: + """ + Given a state, check if the goal was reached and return a reward accordingly + + :param goal: a numpy array representing the goal + :param state: a dict representing the state + :return: the reward for the current goal and state pair and a boolean representing if the goal was reached + """ + dist = self.distance_from_goal(goal, state) + return self.reward_type.convert_distance_to_reward(dist)
+ + +class AgentSelection(DiscreteActionSpace): + """ + An discrete action space which is bounded by the number of agents to select from + """ + def __init__(self, num_agents: int): + super().__init__(num_agents) + + +class SpacesDefinition(object): + """ + A container class that allows passing the definitions of all the spaces at once + """ + def __init__(self, + state: StateSpace, + goal: ObservationSpace, + action: ActionSpace, + reward: RewardSpace): + self.state = state + self.goal = goal + self.action = action + self.reward = reward +
+ +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_sources/components/additional_parameters.rst.txt b/docs/_sources/components/additional_parameters.rst.txt new file mode 100644 index 0000000..cfa8bb4 --- /dev/null +++ b/docs/_sources/components/additional_parameters.rst.txt @@ -0,0 +1,18 @@ +Additional Parameters +===================== + +VisualizationParameters +----------------------- +.. autoclass:: rl_coach.base_parameters.VisualizationParameters + +PresetValidationParameters +-------------------------- +.. autoclass:: rl_coach.base_parameters.PresetValidationParameters + +TaskParameters +-------------- +.. autoclass:: rl_coach.base_parameters.TaskParameters + +DistributedTaskParameters +------------------------- +.. autoclass:: rl_coach.base_parameters.DistributedTaskParameters diff --git a/docs/_sources/components/agents/imitation/bc.rst.txt b/docs/_sources/components/agents/imitation/bc.rst.txt new file mode 100644 index 0000000..3f9c06f --- /dev/null +++ b/docs/_sources/components/agents/imitation/bc.rst.txt @@ -0,0 +1,29 @@ +Behavioral Cloning +================== + +**Actions space:** Discrete | Continuous + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/pg.png + :align: center + + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +The replay buffer contains the expert demonstrations for the task. +These demonstrations are given as state, action tuples, and with no reward. +The training goal is to reduce the difference between the actions predicted by the network and the actions taken by +the expert for each state. + +1. Sample a batch of transitions from the replay buffer. +2. Use the current states as input to the network, and the expert actions as the targets of the network. +3. For the network head, we use the policy head, which uses the cross entropy loss function. + + +.. autoclass:: rl_coach.agents.bc_agent.BCAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/imitation/cil.rst.txt b/docs/_sources/components/agents/imitation/cil.rst.txt new file mode 100644 index 0000000..b48c0d4 --- /dev/null +++ b/docs/_sources/components/agents/imitation/cil.rst.txt @@ -0,0 +1,36 @@ +Conditional Imitation Learning +============================== + +**Actions space:** Discrete | Continuous + +**References:** `End-to-end Driving via Conditional Imitation Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/cil.png + :align: center + + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +The replay buffer contains the expert demonstrations for the task. +These demonstrations are given as state, action tuples, and with no reward. +The training goal is to reduce the difference between the actions predicted by the network and the actions taken by +the expert for each state. +In conditional imitation learning, each transition is assigned a class, which determines the goal that was pursuit +in that transitions. For example, 3 possible classes can be: turn right, turn left and follow lane. + +1. Sample a batch of transitions from the replay buffer, where the batch is balanced, meaning that an equal number + of transitions will be sampled from each class index. +2. Use the current states as input to the network, and assign the expert actions as the targets of the network heads + corresponding to the state classes. For the other heads, set the targets to match the currently predicted values, + so that the loss for the other heads will be zeroed out. +3. We use a regression head, that minimizes the MSE loss between the network predicted values and the target values. + + +.. autoclass:: rl_coach.agents.cil_agent.CILAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/index.rst.txt b/docs/_sources/components/agents/index.rst.txt new file mode 100644 index 0000000..1a5cd42 --- /dev/null +++ b/docs/_sources/components/agents/index.rst.txt @@ -0,0 +1,43 @@ +Agents +====== + +Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes - +value optimization, policy optimization and imitation learning. +A detailed description of those algorithms can be found by navigating to each of the algorithm pages. + +.. image:: /_static/img/algorithms.png + :width: 600px + :align: center + +.. toctree:: + :maxdepth: 1 + :caption: Agents + + policy_optimization/ac + imitation/bc + value_optimization/bs_dqn + value_optimization/categorical_dqn + imitation/cil + policy_optimization/cppo + policy_optimization/ddpg + other/dfp + value_optimization/double_dqn + value_optimization/dqn + value_optimization/dueling_dqn + value_optimization/mmc + value_optimization/n_step + value_optimization/naf + value_optimization/nec + value_optimization/pal + policy_optimization/pg + policy_optimization/ppo + value_optimization/rainbow + value_optimization/qr_dqn + + +.. autoclass:: rl_coach.base_parameters.AgentParameters + +.. autoclass:: rl_coach.agents.agent.Agent + :members: + :inherited-members: + diff --git a/docs/_sources/components/agents/other/dfp.rst.txt b/docs/_sources/components/agents/other/dfp.rst.txt new file mode 100644 index 0000000..6640f56 --- /dev/null +++ b/docs/_sources/components/agents/other/dfp.rst.txt @@ -0,0 +1,39 @@ +Direct Future Prediction +======================== + +**Actions space:** Discrete + +**References:** `Learning to Act by Predicting the Future `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dfp.png + :width: 600px + :align: center + + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ + +1. The current states (observations and measurements) and the corresponding goal vector are passed as an input to the network. + The output of the network is the predicted future measurements for time-steps :math:`t+1,t+2,t+4,t+8,t+16` and + :math:`t+32` for each possible action. +2. For each action, the measurements of each predicted time-step are multiplied by the goal vector, + and the result is a single vector of future values for each action. +3. Then, a weighted sum of the future values of each action is calculated, and the result is a single value for each action. +4. The action values are passed to the exploration policy to decide on the action to use. + +Training the network +++++++++++++++++++++ + +Given a batch of transitions, run them through the network to get the current predictions of the future measurements +per action, and set them as the initial targets for training the network. For each transition +:math:`(s_t,a_t,r_t,s_{t+1} )` in the batch, the target of the network for the action that was taken, is the actual + measurements that were seen in time-steps :math:`t+1,t+2,t+4,t+8,t+16` and :math:`t+32`. + For the actions that were not taken, the targets are the current values. + + +.. autoclass:: rl_coach.agents.dfp_agent.DFPAlgorithmParameters diff --git a/docs/_sources/components/agents/policy_optimization/ac.rst.txt b/docs/_sources/components/agents/policy_optimization/ac.rst.txt new file mode 100644 index 0000000..c748e50 --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/ac.rst.txt @@ -0,0 +1,40 @@ +Actor-Critic +============ + +**Actions space:** Discrete | Continuous + +**References:** `Asynchronous Methods for Deep Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/ac.png + :width: 500px + :align: center + +Algorithm Description +--------------------- + +Choosing an action - Discrete actions ++++++++++++++++++++++++++++++++++++++ + +The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical +distribution assigned with these probabilities. When testing, the action with the highest probability is used. + +Training the network +++++++++++++++++++++ +A batch of :math:`T_{max}` transitions is used, and the advantages are calculated upon it. + +Advantages can be calculated by either of the following methods (configured by the selected preset) - + +1. **A_VALUE** - Estimating advantage directly: + :math:`A(s_t, a_t) = \underbrace{\sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})}_{Q(s_t, a_t)} - V(s_t)` + where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch. + +2. **GAE** - By following the `Generalized Advantage Estimation `_ paper. + +The advantages are then used in order to accumulate gradients according to +:math:`L = -\mathop{\mathbb{E}} [log (\pi) \cdot A]` + + +.. autoclass:: rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/policy_optimization/cppo.rst.txt b/docs/_sources/components/agents/policy_optimization/cppo.rst.txt new file mode 100644 index 0000000..b2cba5d --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/cppo.rst.txt @@ -0,0 +1,44 @@ +Clipped Proximal Policy Optimization +==================================== + +**Actions space:** Discrete | Continuous + +**References:** `Proximal Policy Optimization Algorithms `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/ppo.png + :align: center + +Algorithm Description +--------------------- +Choosing an action - Continuous action +++++++++++++++++++++++++++++++++++++++ + +Same as in PPO. + +Training the network +++++++++++++++++++++ + +Very similar to PPO, with several small (but very simplifying) changes: + +1. Train both the value and policy networks, simultaneously, by defining a single loss function, + which is the sum of each of the networks loss functions. Then, back propagate gradients only once from this unified loss function. + +2. The unified network's optimizer is set to Adam (instead of L-BFGS for the value network as in PPO). + +3. Value targets are now also calculated based on the GAE advantages. + In this method, the :math:`V` values are predicted from the critic network, and then added to the GAE based advantages, + in order to get a :math:`Q` value for each action. Now, since our critic network is predicting a :math:`V` value for + each state, setting the :math:`Q` calculated action-values as a target, will on average serve as a :math:`V` state-value target. + +4. Instead of adapting the penalizing KL divergence coefficient used in PPO, the likelihood ratio + :math:`r_t(\theta) =\frac{\pi_{\theta}(a|s)}{\pi_{\theta_{old}}(a|s)}` is clipped, to achieve a similar effect. + This is done by defining the policy's loss function to be the minimum between the standard surrogate loss and an epsilon + clipped surrogate loss: + + :math:`L^{CLIP}(\theta)=E_{t}[min(r_t(\theta)\cdot \hat{A}_t, clip(r_t(\theta), 1-\epsilon, 1+\epsilon) \cdot \hat{A}_t)]` + + +.. autoclass:: rl_coach.agents.clipped_ppo_agent.ClippedPPOAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/policy_optimization/ddpg.rst.txt b/docs/_sources/components/agents/policy_optimization/ddpg.rst.txt new file mode 100644 index 0000000..d136ab4 --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/ddpg.rst.txt @@ -0,0 +1,50 @@ +Deep Deterministic Policy Gradient +================================== + +**Actions space:** Continuous + +**References:** `Continuous control with deep reinforcement learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/ddpg.png + :align: center + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ + +Pass the current states through the actor network, and get an action mean vector :math:`\mu`. +While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process, +to add exploration noise to the action. When testing, use the mean vector :math:`\mu` as-is. + +Training the network +++++++++++++++++++++ + +Start by sampling a batch of transitions from the experience replay. + +* To train the **critic network**, use the following targets: + + :math:`y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},\mu(s_{t+1} ))` + + First run the actor target network, using the next states as the inputs, and get :math:`\mu (s_{t+1} )`. + Next, run the critic target network using the next states and :math:`\mu (s_{t+1} )`, and use the output to + calculate :math:`y_t` according to the equation above. To train the network, use the current states and actions + as the inputs, and :math:`y_t` as the targets. + +* To train the **actor network**, use the following equation: + + :math:`\nabla_{\theta^\mu } J \approx E_{s_t \tilde{} \rho^\beta } [\nabla_a Q(s,a)|_{s=s_t,a=\mu (s_t ) } \cdot \nabla_{\theta^\mu} \mu(s)|_{s=s_t} ]` + + Use the actor's online network to get the action mean values using the current states as the inputs. + Then, use the critic online network in order to get the gradients of the critic output with respect to the + action mean values :math:`\nabla _a Q(s,a)|_{s=s_t,a=\mu(s_t ) }`. + Using the chain rule, calculate the gradients of the actor's output, with respect to the actor weights, + given :math:`\nabla_a Q(s,a)`. Finally, apply those gradients to the actor network. + +After every training step, do a soft update of the critic and actor target networks' weights from the online networks. + + +.. autoclass:: rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/policy_optimization/hac.rst.txt b/docs/_sources/components/agents/policy_optimization/hac.rst.txt new file mode 100644 index 0000000..b177b53 --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/hac.rst.txt @@ -0,0 +1,24 @@ +Hierarchical Actor Critic +========================= + +**Actions space:** Continuous + +**References:** `Hierarchical Reinforcement Learning with Hindsight `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/ddpg.png + :align: center + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ + +Pass the current states through the actor network, and get an action mean vector :math:`\mu`. +While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process, +to add exploration noise to the action. When testing, use the mean vector :math:`\mu` as-is. + +Training the network +++++++++++++++++++++ diff --git a/docs/_sources/components/agents/policy_optimization/pg.rst.txt b/docs/_sources/components/agents/policy_optimization/pg.rst.txt new file mode 100644 index 0000000..ac0feaa --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/pg.rst.txt @@ -0,0 +1,39 @@ +Policy Gradient +=============== + +**Actions space:** Discrete | Continuous + +**References:** `Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/pg.png + :align: center + +Algorithm Description +--------------------- +Choosing an action - Discrete actions ++++++++++++++++++++++++++++++++++++++ +Run the current states through the network and get a policy distribution over the actions. +While training, sample from the policy distribution. When testing, take the action with the highest probability. + +Training the network +++++++++++++++++++++ +The policy head loss is defined as :math:`L=-log (\pi) \cdot PolicyGradientRescaler`. +The :code:`PolicyGradientRescaler` is used in order to reduce the policy gradient variance, which might be very noisy. +This is done in order to reduce the variance of the updates, since noisy gradient updates might destabilize the policy's +convergence. The rescaler is a configurable parameter and there are few options to choose from: + +* **Total Episode Return** - The sum of all the discounted rewards during the episode. +* **Future Return** - Return from each transition until the end of the episode. +* **Future Return Normalized by Episode** - Future returns across the episode normalized by the episode's mean and standard deviation. +* **Future Return Normalized by Timestep** - Future returns normalized using running means and standard deviations, + which are calculated seperately for each timestep, across different episodes. + +Gradients are accumulated over a number of full played episodes. The gradients accumulation over several episodes +serves the same purpose - reducing the update variance. After accumulating gradients for several episodes, +the gradients are then applied to the network. + + +.. autoclass:: rl_coach.agents.policy_gradients_agent.PolicyGradientAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/policy_optimization/ppo.rst.txt b/docs/_sources/components/agents/policy_optimization/ppo.rst.txt new file mode 100644 index 0000000..ea4ee39 --- /dev/null +++ b/docs/_sources/components/agents/policy_optimization/ppo.rst.txt @@ -0,0 +1,45 @@ +Proximal Policy Optimization +============================ + +**Actions space:** Discrete | Continuous + +**References:** `Proximal Policy Optimization Algorithms `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/ppo.png + :align: center + + +Algorithm Description +--------------------- +Choosing an action - Continuous actions ++++++++++++++++++++++++++++++++++++++++ +Run the observation through the policy network, and get the mean and standard deviation vectors for this observation. +While in training phase, sample from a multi-dimensional Gaussian distribution with these mean and standard deviation values. +When testing, just take the mean values predicted by the network. + +Training the network +++++++++++++++++++++ + +1. Collect a big chunk of experience (in the order of thousands of transitions, sampled from multiple episodes). + +2. Calculate the advantages for each transition, using the *Generalized Advantage Estimation* method (Schulman '2015). + +3. Run a single training iteration of the value network using an L-BFGS optimizer. Unlike first order optimizers, + the L-BFGS optimizer runs on the entire dataset at once, without batching. + It continues running until some low loss threshold is reached. To prevent overfitting to the current dataset, + the value targets are updated in a soft manner, using an Exponentially Weighted Moving Average, based on the total + discounted returns of each state in each episode. + +4. Run several training iterations of the policy network. This is done by using the previously calculated advantages as + targets. The loss function penalizes policies that deviate too far from the old policy (the policy that was used *before* + starting to run the current set of training iterations) using a regularization term. + +5. After training is done, the last sampled KL divergence value will be compared with the *target KL divergence* value, + in order to adapt the penalty coefficient used in the policy loss. If the KL divergence went too high, + increase the penalty, if it went too low, reduce it. Otherwise, leave it unchanged. + + +.. autoclass:: rl_coach.agents.ppo_agent.PPOAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/value_optimization/bs_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/bs_dqn.rst.txt new file mode 100644 index 0000000..0b92eae --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/bs_dqn.rst.txt @@ -0,0 +1,43 @@ +Bootstrapped DQN +================ + +**Actions space:** Discrete + +**References:** `Deep Exploration via Bootstrapped DQN `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/bs_dqn.png + :align: center + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ +The current states are used as the input to the network. The network contains several $Q$ heads, which are used +for returning different estimations of the action :math:`Q` values. For each episode, the bootstrapped exploration policy +selects a single head to play with during the episode. According to the selected head, only the relevant +output :math:`Q` values are used. Using those :math:`Q` values, the exploration policy then selects the action for acting. + +Storing the transitions ++++++++++++++++++++++++ +For each transition, a Binomial mask is generated according to a predefined probability, and the number of output heads. +The mask is a binary vector where each element holds a 0 for heads that shouldn't train on the specific transition, +and 1 for heads that should use the transition for training. The mask is stored as part of the transition info in +the replay buffer. + +Training the network +++++++++++++++++++++ +First, sample a batch of transitions from the replay buffer. Run the current states through the network and get the +current :math:`Q` value predictions for all the heads and all the actions. For each transition in the batch, +and for each output head, if the transition mask is 1 - change the targets of the played action to :math:`y_t`, +according to the standard DQN update rule: + +:math:`y_t=r(s_t,a_t )+\gamma\cdot max_a Q(s_{t+1},a)` + +Otherwise, leave it intact so that the transition does not affect the learning of this head. +Then, train the online network according to the calculated targets. + +As in DQN, once in every few thousand steps, copy the weights from the online network to the target network. + diff --git a/docs/_sources/components/agents/value_optimization/categorical_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/categorical_dqn.rst.txt new file mode 100644 index 0000000..dc07872 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/categorical_dqn.rst.txt @@ -0,0 +1,39 @@ +Categorical DQN +=============== + +**Actions space:** Discrete + +**References:** `A Distributional Perspective on Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/distributional_dqn.png + :align: center + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. The Bellman update is projected to the set of atoms representing the :math:`Q` values distribution, such + that the :math:`i-th` component of the projected update is calculated as follows: + + :math:`(\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))` + + where: + * :math:`[ \cdot ]` bounds its argument in the range :math:`[a, b]` + * :math:`\hat{T}_{z_{j}}` is the Bellman update for atom :math:`z_j`: :math:`\hat{T}_{z_{j}} := r+\gamma z_j` + + +3. Network is trained with the cross entropy loss between the resulting probability distribution and the target + probability distribution. Only the target of the actions that were actually taken is updated. + +4. Once in every few thousand steps, weights are copied from the online network to the target network. + + + +.. autoclass:: rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt new file mode 100644 index 0000000..cb29797 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt @@ -0,0 +1,35 @@ +Double DQN +========== + +**Actions space:** Discrete + +**References:** `Deep Reinforcement Learning with Double Q-learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dqn.png + :align: center + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. Using the next states from the sampled batch, run the online network in order to find the $Q$ maximizing + action :math:`argmax_a Q(s_{t+1},a)`. For these actions, use the corresponding next states and run the target + network to calculate :math:`Q(s_{t+1},argmax_a Q(s_{t+1},a))`. + +3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), + use the current states from the sampled batch, and run the online network to get the current Q values predictions. + Set those values as the targets for the actions that were not actually played. + +4. For each action that was played, use the following equation for calculating the targets of the network: + :math:`y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},argmax_a Q(s_{t+1},a))` + +5. Finally, train the online network using the current states as inputs, and with the aforementioned targets. + +6. Once in every few thousand steps, copy the weights from the online network to the target network. diff --git a/docs/_sources/components/agents/value_optimization/dqn.rst.txt b/docs/_sources/components/agents/value_optimization/dqn.rst.txt new file mode 100644 index 0000000..4882e38 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/dqn.rst.txt @@ -0,0 +1,37 @@ +Deep Q Networks +=============== + +**Actions space:** Discrete + +**References:** `Playing Atari with Deep Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dqn.png + :align: center + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. Using the next states from the sampled batch, run the target network to calculate the :math:`Q` values for each of + the actions :math:`Q(s_{t+1},a)`, and keep only the maximum value for each state. + +3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), + use the current states from the sampled batch, and run the online network to get the current Q values predictions. + Set those values as the targets for the actions that were not actually played. + +4. For each action that was played, use the following equation for calculating the targets of the network:​ $$ y_t=r(s_t,a_t)+γ\cdot max_a {Q(s_{t+1},a)} $$ + :math:`y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})` + +5. Finally, train the online network using the current states as inputs, and with the aforementioned targets. + +6. Once in every few thousand steps, copy the weights from the online network to the target network. + + +.. autoclass:: rl_coach.agents.dqn_agent.DQNAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/dueling_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/dueling_dqn.rst.txt new file mode 100644 index 0000000..d29b305 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/dueling_dqn.rst.txt @@ -0,0 +1,27 @@ +Dueling DQN +=========== + +**Actions space:** Discrete + +**References:** `Dueling Network Architectures for Deep Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dueling_dqn.png + :align: center + +General Description +------------------- +Dueling DQN presents a change in the network structure comparing to DQN. + +Dueling DQN uses a specialized *Dueling Q Head* in order to separate :math:`Q` to an :math:`A` (advantage) +stream and a :math:`V` stream. Adding this type of structure to the network head allows the network to better differentiate +actions from one another, and significantly improves the learning. + +In many states, the values of the different actions are very similar, and it is less important which action to take. +This is especially important in environments where there are many actions to choose from. In DQN, on each training +iteration, for each of the states in the batch, we update the :ath:`Q` values only for the specific actions taken in +those states. This results in slower learning as we do not learn the :math:`Q` values for actions that were not taken yet. +On dueling architecture, on the other hand, learning is faster - as we start learning the state-value even if only a +single action has been taken at this state. \ No newline at end of file diff --git a/docs/_sources/components/agents/value_optimization/mmc.rst.txt b/docs/_sources/components/agents/value_optimization/mmc.rst.txt new file mode 100644 index 0000000..c96b4ca --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/mmc.rst.txt @@ -0,0 +1,37 @@ +Mixed Monte Carlo +================= + +**Actions space:** Discrete + +**References:** `Count-Based Exploration with Neural Density Models `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dqn.png + :align: center + +Algorithm Description +--------------------- +Training the network +++++++++++++++++++++ + +In MMC, targets are calculated as a mixture between Double DQN targets and full Monte Carlo samples (total discounted returns). + +The DDQN targets are calculated in the same manner as in the DDQN agent: + +:math:`y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))` + +The Monte Carlo targets are calculated by summing up the discounted rewards across the entire episode: + +:math:`y_t^{MC}=\sum_{j=0}^T\gamma^j r(s_{t+j},a_{t+j} )` + +A mixing ratio $\alpha$ is then used to get the final targets: + +:math:`y_t=(1-\alpha)\cdot y_t^{DDQN}+\alpha \cdot y_t^{MC}` + +Finally, the online network is trained using the current states as inputs, and the calculated targets. +Once in every few thousand steps, copy the weights from the online network to the target network. + + +.. autoclass:: rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/n_step.rst.txt b/docs/_sources/components/agents/value_optimization/n_step.rst.txt new file mode 100644 index 0000000..6ff0722 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/n_step.rst.txt @@ -0,0 +1,35 @@ +N-Step Q Learning +================= + +**Actions space:** Discrete + +**References:** `Asynchronous Methods for Deep Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dqn.png + :align: center + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +The :math:`N`-step Q learning algorithm works in similar manner to DQN except for the following changes: + +1. No replay buffer is used. Instead of sampling random batches of transitions, the network is trained every + :math:`N` steps using the latest :math:`N` steps played by the agent. + +2. In order to stabilize the learning, multiple workers work together to update the network. + This creates the same effect as uncorrelating the samples used for training. + +3. Instead of using single-step Q targets for the network, the rewards from $N$ consequent steps are accumulated + to form the :math:`N`-step Q targets, according to the following equation: + :math:`R(s_t, a_t) = \sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})` + where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch + + + +.. autoclass:: rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/naf.rst.txt b/docs/_sources/components/agents/value_optimization/naf.rst.txt new file mode 100644 index 0000000..8d7df05 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/naf.rst.txt @@ -0,0 +1,33 @@ +Normalized Advantage Functions +============================== + +**Actions space:** Continuous + +**References:** `Continuous Deep Q-Learning with Model-based Acceleration `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/naf.png + :width: 600px + :align: center + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ +The current state is used as an input to the network. The action mean :math:`\mu(s_t )` is extracted from the output head. +It is then passed to the exploration policy which adds noise in order to encourage exploration. + +Training the network +++++++++++++++++++++ +The network is trained by using the following targets: +:math:`y_t=r(s_t,a_t )+\gamma\cdot V(s_{t+1})` +Use the next states as the inputs to the target network and extract the :math:`V` value, from within the head, +to get :math:`V(s_{t+1} )`. Then, update the online network using the current states and actions as inputs, +and :math:`y_t` as the targets. +After every training step, use a soft update in order to copy the weights from the online network to the target network. + + + +.. autoclass:: rl_coach.agents.naf_agent.NAFAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/nec.rst.txt b/docs/_sources/components/agents/value_optimization/nec.rst.txt new file mode 100644 index 0000000..7410a9e --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/nec.rst.txt @@ -0,0 +1,50 @@ +Neural Episodic Control +======================= + +**Actions space:** Discrete + +**References:** `Neural Episodic Control `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/nec.png + :width: 500px + :align: center + +Algorithm Description +--------------------- +Choosing an action +++++++++++++++++++ + +1. Use the current state as an input to the online network and extract the state embedding, which is the intermediate + output from the middleware. + +2. For each possible action :math:`a_i`, run the DND head using the state embedding and the selected action :math:`a_i` as inputs. + The DND is queried and returns the :math:`P` nearest neighbor keys and values. The keys and values are used to calculate + and return the action :math:`Q` value from the network. + +3. Pass all the :math:`Q` values to the exploration policy and choose an action accordingly. + +4. Store the state embeddings and actions taken during the current episode in a small buffer :math:`B`, in order to + accumulate transitions until it is possible to calculate the total discounted returns over the entire episode. + +Finalizing an episode ++++++++++++++++++++++ +For each step in the episode, the state embeddings and the taken actions are stored in the buffer :math:`B`. +When the episode is finished, the replay buffer calculates the :math:`N`-step total return of each transition in the +buffer, bootstrapped using the maximum :math:`Q` value of the :math:`N`-th transition. Those values are inserted +along with the total return into the DND, and the buffer :math:`B` is reset. + +Training the network +++++++++++++++++++++ +Train the network only when the DND has enough entries for querying. + +To train the network, the current states are used as the inputs and the :math:`N`-step returns are used as the targets. +The :math:`N`-step return used takes into account :math:`N` consecutive steps, and bootstraps the last value from +the network if necessary: +:math:`y_t=\sum_{j=0}^{N-1}\gamma^j r(s_{t+j},a_{t+j} ) +\gamma^N max_a Q(s_{t+N},a)` + + + +.. autoclass:: rl_coach.agents.nec_agent.NECAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/pal.rst.txt b/docs/_sources/components/agents/value_optimization/pal.rst.txt new file mode 100644 index 0000000..9ebcba6 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/pal.rst.txt @@ -0,0 +1,45 @@ +Persistent Advantage Learning +============================= + +**Actions space:** Discrete + +**References:** `Increasing the Action Gap: New Operators for Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/dqn.png + :align: center + +Algorithm Description +--------------------- +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. Start by calculating the initial target values in the same manner as they are calculated in DDQN + :math:`y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))` + +3. The action gap :math:`V(s_t )-Q(s_t,a_t)` should then be subtracted from each of the calculated targets. + To calculate the action gap, run the target network using the current states and get the :math:`Q` values + for all the actions. Then estimate :math:`V` as the maximum predicted :math:`Q` value for the current state: + :math:`V(s_t )=max_a Q(s_t,a)` + +4. For *advantage learning (AL)*, reduce the action gap weighted by a predefined parameter :math:`\alpha` from + the targets :math:`y_t^{DDQN}`: + :math:`y_t=y_t^{DDQN}-\alpha \cdot (V(s_t )-Q(s_t,a_t ))` + +5. For *persistent advantage learning (PAL)*, the target network is also used in order to calculate the action + gap for the next state: + :math:`V(s_{t+1} )-Q(s_{t+1},a_{t+1})` + where :math:`a_{t+1}` is chosen by running the next states through the online network and choosing the action that + has the highest predicted :math:`Q` value. Finally, the targets will be defined as - + :math:`y_t=y_t^{DDQN}-\alpha \cdot min(V(s_t )-Q(s_t,a_t ),V(s_{t+1} )-Q(s_{t+1},a_{t+1} ))` + +6. Train the online network using the current states as inputs, and with the aforementioned targets. + +7. Once in every few thousand steps, copy the weights from the online network to the target network. + + +.. autoclass:: rl_coach.agents.pal_agent.PALAlgorithmParameters diff --git a/docs/_sources/components/agents/value_optimization/qr_dqn.rst.txt b/docs/_sources/components/agents/value_optimization/qr_dqn.rst.txt new file mode 100644 index 0000000..88bb5c3 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/qr_dqn.rst.txt @@ -0,0 +1,33 @@ +Quantile Regression DQN +======================= + +**Actions space:** Discrete + +**References:** `Distributional Reinforcement Learning with Quantile Regression `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/qr_dqn.png + :align: center + +Algorithm Description +--------------------- + +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. First, the next state quantiles are predicted. These are used in order to calculate the targets for the network, + by following the Bellman equation. + Next, the current quantile locations for the current states are predicted, sorted, and used for calculating the + quantile midpoints targets. + +3. The network is trained with the quantile regression loss between the resulting quantile locations and the target + quantile locations. Only the targets of the actions that were actually taken are updated. + +4. Once in every few thousand steps, weights are copied from the online network to the target network. + + +.. autoclass:: rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters \ No newline at end of file diff --git a/docs/_sources/components/agents/value_optimization/rainbow.rst.txt b/docs/_sources/components/agents/value_optimization/rainbow.rst.txt new file mode 100644 index 0000000..5c2b443 --- /dev/null +++ b/docs/_sources/components/agents/value_optimization/rainbow.rst.txt @@ -0,0 +1,51 @@ +Rainbow +======= + +**Actions space:** Discrete + +**References:** `Rainbow: Combining Improvements in Deep Reinforcement Learning `_ + +Network Structure +----------------- + +.. image:: /_static/img/design_imgs/rainbow.png + :align: center + +Algorithm Description +--------------------- + +Rainbow combines 6 recent advancements in reinforcement learning: + +* N-step returns +* Distributional state-action value learning +* Dueling networks +* Noisy Networks +* Double DQN +* Prioritized Experience Replay + +Training the network +++++++++++++++++++++ + +1. Sample a batch of transitions from the replay buffer. + +2. The Bellman update is projected to the set of atoms representing the :math:`Q` values distribution, such + that the :math:`i-th` component of the projected update is calculated as follows: + + :math:`(\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))` + + where: + * :math:`[ \cdot ]` bounds its argument in the range :math:`[a, b]` + * :math:`\hat{T}_{z_{j}}` is the Bellman update for atom + :math:`z_j`: :math:`\hat{T}_{z_{j}} := r_t+\gamma r_{t+1} + ... + \gamma r_{t+n-1} + \gamma^{n-1} z_j` + + +3. Network is trained with the cross entropy loss between the resulting probability distribution and the target + probability distribution. Only the target of the actions that were actually taken is updated. + +4. Once in every few thousand steps, weights are copied from the online network to the target network. + +5. After every training step, the priorities of the batch transitions are updated in the prioritized replay buffer + using the KL divergence loss that is returned from the network. + + +.. autoclass:: rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters diff --git a/docs/_sources/components/architectures/index.rst.txt b/docs/_sources/components/architectures/index.rst.txt new file mode 100644 index 0000000..3e3fa83 --- /dev/null +++ b/docs/_sources/components/architectures/index.rst.txt @@ -0,0 +1,27 @@ +Architectures +============= + +Architectures contain all the classes that implement the neural network related stuff for the agent. +Since Coach is intended to work with multiple neural network frameworks, each framework will implement its +own components under a dedicated directory. For example, tensorflow components will contain all the neural network +parts that are implemented using TensorFlow. + +.. autoclass:: rl_coach.base_parameters.NetworkParameters + +Architecture +------------ +.. autoclass:: rl_coach.architectures.architecture.Architecture + :members: + :inherited-members: + +NetworkWrapper +-------------- + +.. image:: /_static/img/distributed.png + :width: 600px + :align: center + +.. autoclass:: rl_coach.architectures.network_wrapper.NetworkWrapper + :members: + :inherited-members: + diff --git a/docs/_sources/components/core_types.rst.txt b/docs/_sources/components/core_types.rst.txt new file mode 100644 index 0000000..5202c22 --- /dev/null +++ b/docs/_sources/components/core_types.rst.txt @@ -0,0 +1,33 @@ +Core Types +========== + +ActionInfo +---------- +.. autoclass:: rl_coach.core_types.ActionInfo + :members: + :inherited-members: + +Batch +----- +.. autoclass:: rl_coach.core_types.Batch + :members: + :inherited-members: + +EnvResponse +----------- +.. autoclass:: rl_coach.core_types.EnvResponse + :members: + :inherited-members: + +Episode +------- +.. autoclass:: rl_coach.core_types.Episode + :members: + :inherited-members: + +Transition +---------- +.. autoclass:: rl_coach.core_types.Transition + :members: + :inherited-members: + diff --git a/docs/_sources/components/environments/index.rst.txt b/docs/_sources/components/environments/index.rst.txt new file mode 100644 index 0000000..5f0d20f --- /dev/null +++ b/docs/_sources/components/environments/index.rst.txt @@ -0,0 +1,70 @@ +Environments +============ + +.. autoclass:: rl_coach.environments.environment.Environment + :members: + :inherited-members: + +DeepMind Control Suite +---------------------- + +A set of reinforcement learning environments powered by the MuJoCo physics engine. + +Website: `DeepMind Control Suite `_ + +.. autoclass:: rl_coach.environments.control_suite_environment.ControlSuiteEnvironment + + +Blizzard Starcraft II +--------------------- + +A popular strategy game which was wrapped with a python interface by DeepMind. + +Website: `Blizzard Starcraft II `_ + +.. autoclass:: rl_coach.environments.starcraft2_environment.StarCraft2Environment + + +ViZDoom +-------- + +A Doom-based AI research platform for reinforcement learning from raw visual information. + +Website: `ViZDoom `_ + +.. autoclass:: rl_coach.environments.doom_environment.DoomEnvironment + + +CARLA +----- + +An open-source simulator for autonomous driving research. + +Website: `CARLA `_ + +.. autoclass:: rl_coach.environments.carla_environment.CarlaEnvironment + +OpenAI Gym +---------- + +A library which consists of a set of environments, from games to robotics. +Additionally, it can be extended using the API defined by the authors. + +Website: `OpenAI Gym `_ + +In Coach, we support all the native environments in Gym, along with several extensions such as: + +* `Roboschool `_ - a set of environments powered by the PyBullet engine, + that offer a free alternative to MuJoCo. + +* `Gym Extensions `_ - a set of environments that extends Gym for + auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.) + +* `PyBullet `_ - a physics engine that + includes a set of robotics environments. + + +.. autoclass:: rl_coach.environments.gym_environment.GymEnvironment + + + diff --git a/docs/_sources/components/exploration_policies/index.rst.txt b/docs/_sources/components/exploration_policies/index.rst.txt new file mode 100644 index 0000000..10b6c77 --- /dev/null +++ b/docs/_sources/components/exploration_policies/index.rst.txt @@ -0,0 +1,87 @@ +Exploration Policies +==================== + +Exploration policies are a component that allow the agent to tradeoff exploration and exploitation according to a +predefined policy. This is one of the most important aspects of reinforcement learning agents, and can require some +tuning to get it right. Coach supports several pre-defined exploration policies, and it can be easily extended with +custom policies. Note that not all exploration policies are expected to work for both discrete and continuous action +spaces. + +.. role:: green +.. role:: red + ++----------------------+-----------------------+------------------+ +| Exploration Policy | Discrete Action Space | Box Action Space | ++======================+=======================+==================+ +| AdditiveNoise | :red:`X` | :green:`V` | ++----------------------+-----------------------+------------------+ +| Boltzmann | :green:`V` | :red:`X` | ++----------------------+-----------------------+------------------+ +| Bootstrapped | :green:`V` | :red:`X` | ++----------------------+-----------------------+------------------+ +| Categorical | :green:`V` | :red:`X` | ++----------------------+-----------------------+------------------+ +| ContinuousEntropy | :red:`X` | :green:`V` | ++----------------------+-----------------------+------------------+ +| EGreedy | :green:`V` | :green:`V` | ++----------------------+-----------------------+------------------+ +| Greedy | :green:`V` | :green:`V` | ++----------------------+-----------------------+------------------+ +| OUProcess | :red:`X` | :green:`V` | ++----------------------+-----------------------+------------------+ +| ParameterNoise | :green:`V` | :green:`V` | ++----------------------+-----------------------+------------------+ +| TruncatedNormal | :red:`X` | :green:`V` | ++----------------------+-----------------------+------------------+ +| UCB | :green:`V` | :red:`X` | ++----------------------+-----------------------+------------------+ + +ExplorationPolicy +----------------- +.. autoclass:: rl_coach.exploration_policies.ExplorationPolicy + :members: + :inherited-members: + +AdditiveNoise +------------- +.. autoclass:: rl_coach.exploration_policies.AdditiveNoise + +Boltzmann +--------- +.. autoclass:: rl_coach.exploration_policies.Boltzmann + +Bootstrapped +------------ +.. autoclass:: rl_coach.exploration_policies.Bootstrapped + +Categorical +----------- +.. autoclass:: rl_coach.exploration_policies.Categorical + +ContinuousEntropy +----------------- +.. autoclass:: rl_coach.exploration_policies.ContinuousEntropy + +EGreedy +------- +.. autoclass:: rl_coach.exploration_policies.EGreedy + +Greedy +------ +.. autoclass:: rl_coach.exploration_policies.Greedy + +OUProcess +--------- +.. autoclass:: rl_coach.exploration_policies.OUProcess + +ParameterNoise +-------------- +.. autoclass:: rl_coach.exploration_policies.ParameterNoise + +TruncatedNormal +--------------- +.. autoclass:: rl_coach.exploration_policies.TruncatedNormal + +UCB +--- +.. autoclass:: rl_coach.exploration_policies.UCB \ No newline at end of file diff --git a/docs/_sources/components/filters/index.rst.txt b/docs/_sources/components/filters/index.rst.txt new file mode 100644 index 0000000..1e4c7f5 --- /dev/null +++ b/docs/_sources/components/filters/index.rst.txt @@ -0,0 +1,28 @@ +Filters +======= + +.. toctree:: + :maxdepth: 1 + :caption: Filters + + input_filters + output_filters + +Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information. +There are two filter categories - + +* **Input filters** - these are filters that process the information passed **into** the agent from the environment. + This information includes the observation and the reward. Input filters therefore allow rescaling observations, + normalizing rewards, stack observations, etc. + +* **Output filters** - these are filters that process the information going **out** of the agent into the environment. + This information includes the action the agent chooses to take. Output filters therefore allow conversion of + actions from one space into another. For example, the agent can take :math:`N` discrete actions, that will be mapped by + the output filter onto :math:`N` continuous actions. + +Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs. + +.. image:: /_static/img/filters.png + :width: 350px + :align: center + diff --git a/docs/_sources/components/filters/input_filters.rst.txt b/docs/_sources/components/filters/input_filters.rst.txt new file mode 100644 index 0000000..3b0d4b1 --- /dev/null +++ b/docs/_sources/components/filters/input_filters.rst.txt @@ -0,0 +1,67 @@ +Input Filters +============= + +The input filters are separated into two categories - **observation filters** and **reward filters**. + +Observation Filters +------------------- + +ObservationClippingFilter ++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationClippingFilter + +ObservationCropFilter ++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationCropFilter + +ObservationMoveAxisFilter ++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationMoveAxisFilter + +ObservationNormalizationFilter +++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationNormalizationFilter + +ObservationReductionBySubPartsNameFilter +++++++++++++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationReductionBySubPartsNameFilter + +ObservationRescaleSizeByFactorFilter +++++++++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationRescaleSizeByFactorFilter + +ObservationRescaleToSizeFilter +++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationRescaleToSizeFilter + +ObservationRGBToYFilter ++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationRGBToYFilter + +ObservationSqueezeFilter +++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationSqueezeFilter + +ObservationStackingFilter ++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationStackingFilter + +ObservationToUInt8Filter +++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.observation.ObservationToUInt8Filter + + +Reward Filters +-------------- + +RewardClippingFilter +++++++++++++++++++++ +.. autoclass:: rl_coach.filters.reward.RewardClippingFilter + +RewardNormalizationFilter ++++++++++++++++++++++++++ +.. autoclass:: rl_coach.filters.reward.RewardNormalizationFilter + +RewardRescaleFilter ++++++++++++++++++++ +.. autoclass:: rl_coach.filters.reward.RewardRescaleFilter diff --git a/docs/_sources/components/filters/output_filters.rst.txt b/docs/_sources/components/filters/output_filters.rst.txt new file mode 100644 index 0000000..1a2f460 --- /dev/null +++ b/docs/_sources/components/filters/output_filters.rst.txt @@ -0,0 +1,37 @@ +Output Filters +-------------- + +The output filters only process the actions. + +Action Filters +++++++++++++++ + +.. autoclass:: rl_coach.filters.action.AttentionDiscretization + +.. image:: /_static/img/attention_discretization.png + :align: center + +.. autoclass:: rl_coach.filters.action.BoxDiscretization + +.. image:: /_static/img/box_discretization.png + :align: center + +.. autoclass:: rl_coach.filters.action.BoxMasking + +.. image:: /_static/img/box_masking.png + :align: center + +.. autoclass:: rl_coach.filters.action.PartialDiscreteActionSpaceMap + +.. image:: /_static/img/partial_discrete_action_space_map.png + :align: center + +.. autoclass:: rl_coach.filters.action.FullDiscreteActionSpaceMap + +.. image:: /_static/img/full_discrete_action_space_map.png + :align: center + +.. autoclass:: rl_coach.filters.action.LinearBoxToBoxMap + +.. image:: /_static/img/linear_box_to_box_map.png + :align: center \ No newline at end of file diff --git a/docs/_sources/components/memories/index.rst.txt b/docs/_sources/components/memories/index.rst.txt new file mode 100644 index 0000000..2575a32 --- /dev/null +++ b/docs/_sources/components/memories/index.rst.txt @@ -0,0 +1,44 @@ +Memories +======== + +Episodic Memories +----------------- + +EpisodicExperienceReplay +++++++++++++++++++++++++ +.. autoclass:: rl_coach.memories.episodic.EpisodicExperienceReplay + +EpisodicHindsightExperienceReplay ++++++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.memories.episodic.EpisodicHindsightExperienceReplay + +EpisodicHRLHindsightExperienceReplay +++++++++++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay + +SingleEpisodeBuffer ++++++++++++++++++++ +.. autoclass:: rl_coach.memories.episodic.SingleEpisodeBuffer + + +Non-Episodic Memories +--------------------- +BalancedExperienceReplay +++++++++++++++++++++++++ +.. autoclass:: rl_coach.memories.non_episodic.BalancedExperienceReplay + +QDND +++++ +.. autoclass:: rl_coach.memories.non_episodic.QDND + +ExperienceReplay +++++++++++++++++ +.. autoclass:: rl_coach.memories.non_episodic.ExperienceReplay + +PrioritizedExperienceReplay ++++++++++++++++++++++++++++ +.. autoclass:: rl_coach.memories.non_episodic.PrioritizedExperienceReplay + +TransitionCollection +++++++++++++++++++++ +.. autoclass:: rl_coach.memories.non_episodic.TransitionCollection diff --git a/docs/_sources/components/spaces.rst.txt b/docs/_sources/components/spaces.rst.txt new file mode 100644 index 0000000..4adf3f5 --- /dev/null +++ b/docs/_sources/components/spaces.rst.txt @@ -0,0 +1,64 @@ +Spaces +====== + +Space +----- +.. autoclass:: rl_coach.spaces.Space + :members: + :inherited-members: + + + +Observation Spaces +------------------ +.. autoclass:: rl_coach.spaces.ObservationSpace + :members: + :inherited-members: + +VectorObservationSpace +++++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.VectorObservationSpace + +PlanarMapsObservationSpace +++++++++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.PlanarMapsObservationSpace + +ImageObservationSpace ++++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.ImageObservationSpace + + + +Action Spaces +------------- +.. autoclass:: rl_coach.spaces.ActionSpace + :members: + :inherited-members: + +AttentionActionSpace +++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.AttentionActionSpace + +BoxActionSpace +++++++++++++++ +.. autoclass:: rl_coach.spaces.BoxActionSpace + +DiscreteActionSpace +++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.DiscreteActionSpace + +MultiSelectActionSpace +++++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.MultiSelectActionSpace + +CompoundActionSpace ++++++++++++++++++++ +.. autoclass:: rl_coach.spaces.CompoundActionSpace + + + +Goal Spaces +----------- +.. autoclass:: rl_coach.spaces.GoalsSpace + :members: + :inherited-members: diff --git a/docs/_sources/contributing/add_agent.rst.txt b/docs/_sources/contributing/add_agent.rst.txt new file mode 100644 index 0000000..52a95b4 --- /dev/null +++ b/docs/_sources/contributing/add_agent.rst.txt @@ -0,0 +1,80 @@ +Adding a New Agent +================== + +Coach's modularity makes adding an agent a simple and clean task. +We suggest using the following +`Jupyter notebook tutorial `_ +to ramp up on this process. In general, it involves the following steps: + +1. Implement your algorithm in a new file. The agent can inherit base classes such as **ValueOptimizationAgent** or + **ActorCriticAgent**, or the more generic **Agent** base class. + + .. note:: + **ValueOptimizationAgent**, **PolicyOptimizationAgent** and **Agent** are abstract classes. + :code:`learn_from_batch()` should be overriden with the desired behavior for the algorithm being implemented. + If deciding to inherit from **Agent**, also :code:`choose_action()` should be overriden. + + .. code-block:: python + + def learn_from_batch(self, batch) -> Tuple[float, List, List]: + """ + Given a batch of transitions, calculates their target values and updates the network. + :param batch: A list of transitions + :return: The total loss of the training, the loss per head and the unclipped gradients + """ + + def choose_action(self, curr_state): + """ + choose an action to act with in the current episode being played. Different behavior might be exhibited when training + or testing. + + :param curr_state: the current state to act upon. + :return: chosen action, some action value describing the action (q-value, probability, etc) + """ + +2. Implement your agent's specific network head, if needed, at the implementation for the framework of your choice. + For example **architectures/neon_components/heads.py**. The head will inherit the generic base class Head. + A new output type should be added to configurations.py, and a mapping between the new head and output type should + be defined in the get_output_head() function at **architectures/neon_components/general_network.py** + +3. Define a new parameters class that inherits AgentParameters. + The parameters class defines all the hyperparameters for the agent, and is initialized with 4 main components: + + * **algorithm**: A class inheriting AlgorithmParameters which defines any algorithm specific parameters + + * **exploration**: A class inheriting ExplorationParameters which defines the exploration policy parameters. + There are several common exploration policies built-in which you can use, and are defined under + the exploration sub directory. You can also define your own custom exploration policy. + + * **memory**: A class inheriting MemoryParameters which defined the memory parameters. + There are several common memory types built-in which you can use, and are defined under the memories + sub directory. You can also define your own custom memory. + + * **networks**: A dictionary defining all the networks that will be used by the agent. The keys of the dictionary + define the network name and will be used to access each network through the agent class. + The dictionary values are a class inheriting NetworkParameters, which define the network structure + and parameters. + + + Additionally, set the path property to return the path to your agent class in the following format: + + :code:`:` + + For example, + + .. code-block:: python + + class RainbowAgentParameters(AgentParameters): + def __init__(self): + super().__init__(algorithm=RainbowAlgorithmParameters(), + exploration=RainbowExplorationParameters(), + memory=RainbowMemoryParameters(), + networks={"main": RainbowNetworkParameters()}) + + @property + def path(self): + return 'rainbow.rainbow_agent:RainbowAgent' + +4. (Optional) Define a preset using the new agent type with a given environment, and the hyper-parameters that should + be used for training on that environment. + diff --git a/docs_raw/docs/contributing/add_env.md b/docs/_sources/contributing/add_env.rst.txt similarity index 68% rename from docs_raw/docs/contributing/add_env.md rename to docs/_sources/contributing/add_env.rst.txt index 25a7f2c..ed2777f 100644 --- a/docs_raw/docs/contributing/add_env.md +++ b/docs/_sources/contributing/add_env.rst.txt @@ -1,29 +1,41 @@ -Adding a new environment to Coach is as easy as solving CartPole. +Adding a New Environment +======================== + +Adding a new environment to Coach is as easy as solving CartPole. There are essentially two ways to integrate new environments to Coach: -## Using the OpenAI Gym API +Using the OpenAI Gym API +------------------------ If your environment is already using the OpenAI Gym API, you are already good to go. -When selecting the environment parameters in the preset, use GymEnvironmentParameters(), +When selecting the environment parameters in the preset, use :code:`GymEnvironmentParameters()`, and pass the path to your environment source code using the level parameter. You can specify additional parameters for your environment using the additional_simulator_parameters parameter. -Take for example the definition used in the Pendulum_HAC preset: +Take for example the definition used in the :code:`Pendulum_HAC` preset: + +.. code-block:: python env_params = GymEnvironmentParameters() env_params.level = "rl_coach.environments.mujoco.pendulum_with_goals:PendulumWithGoals" env_params.additional_simulator_parameters = {"time_limit": 1000} -## Using the Coach API +Using the Coach API +------------------- There are a few simple steps to follow, and we will walk through them one by one. +As an alternative, we highly recommend following the corresponding +`tutorial `_ +in the GitHub repo. -1. Create a new class for your environment, and inherit the Environment class. +1. Create a new class for your environment, and inherit the Environment class. -2. Coach defines a simple API for implementing a new environment, which are defined in environment/environment.py. - There are several functions to implement, but only some of them are mandatory. +2. Coach defines a simple API for implementing a new environment, which are defined in environment/environment.py. + There are several functions to implement, but only some of them are mandatory. - Here are the important ones: + Here are the important ones: + + .. code-block:: python def _take_action(self, action_idx: ActionType) -> None: """ @@ -59,10 +71,12 @@ There are a few simple steps to follow, and we will walk through them one by one :return: numpy array containing the image that will be rendered to the screen """ -3. Create a new parameters class for your environment, which inherits the EnvironmentParameters class. - In the __init__ of your class, define all the parameters you used in your Environment class. - Additionally, fill the path property of the class with the path to your Environment class. - For example, take a look at the EnvironmentParameters class used for Doom: +3. Create a new parameters class for your environment, which inherits the EnvironmentParameters class. + In the __init__ of your class, define all the parameters you used in your Environment class. + Additionally, fill the path property of the class with the path to your Environment class. + For example, take a look at the EnvironmentParameters class used for Doom: + + .. code-block:: python class DoomEnvironmentParameters(EnvironmentParameters): def __init__(self): diff --git a/docs_raw/docs/dashboard.md b/docs/_sources/dashboard.rst.txt similarity index 66% rename from docs_raw/docs/dashboard.md rename to docs/_sources/dashboard.rst.txt index c00a1ac..2d89e3e 100644 --- a/docs_raw/docs/dashboard.md +++ b/docs/_sources/dashboard.rst.txt @@ -1,86 +1,63 @@ -Reinforcement learning algorithms are neat. That is - when they work. But when they don't, RL algorithms are often quite tricky to debug. +Coach Dashboard +=============== + +Reinforcement learning algorithms are neat. That is - when they work. But when they don't, RL algorithms are often quite tricky to debug. Finding the root cause for why things break in RL is rather difficult. Moreover, different RL algorithms shine in some aspects, but then lack on other. Comparing the algorithms faithfully is also a hard task, which requires the right tools. Coach Dashboard is a visualization tool which simplifies the analysis of the training process. Each run of Coach extracts a lot of information from within the algorithm and stores it in the experiment directory. This information is very valuable for debugging, analyzing and comparing different algorithms. But without a good visualization tool, this information can not be utilized. This is where Coach Dashboard takes place. -### Visualizing Signals +Visualizing Signals +------------------- Coach Dashboard exposes a convenient user interface for visualizing the training signals. The signals are dynamically updated - during the agent training. Additionaly, it allows selecting a subset of the available signals, and then overlaying them on top of each other. -

- -Updating Dynamically - -

+.. image:: /_static/img/updating_dynamically.gif + :width: 800px + :align: center * Holding the CTRL key, while selecting signals, will allow visualizing more than one signal. * Signals can be visualized, using either of the Y-axes, in order to visualize signals with different scales. To move a signal to the second Y-axis, select it and press the 'Toggle Second Axis' button. -### Tracking Statistics +Tracking Statistics +------------------- -When running parallel algorithms, such as A3C, it often helps visualizing the learning of all the workers, at the same time. Coach Dashboard allows viewing multiple signals (and even smooth them out, if required) from multiple workers. In addition, it supports viewing the mean and standard deviation of the same signal, across different workers, using Bollinger bands. +When running parallel algorithms, such as A3C, it often helps visualizing the learning of all the workers, at the same time. Coach Dashboard allows viewing multiple signals (and even smooth them out, if required) from multiple workers. In addition, it supports viewing the mean and standard deviation of the same signal, across different workers, using Bollinger bands. -

- - - - - -
- Bollinger Bands - Displaying Bollinger Bands - - Separate Signals - Displaying All The Workers -
+.. figure:: /_static/img/bollinger_bands.png + :width: 800px + :align: center + + **Displaying Bollinger Bands** +.. figure:: /_static/img/separate_signals.png + :width: 800px + :align: center + **Displaying all the Workers** - -

- - - -### Comparing Runs +Comparing Runs +-------------- Reinforcement learning algorithms are notoriously known as unstable, and suffer from high run-to-run variance. This makes benchmarking and comparing different algorithms even harder. To ease this process, it is common to execute several runs of the same algorithm and average over them. This is easy to do with Coach Dashboard, by centralizing all the experiment directories in a single directory, and then loading them as a single group. Loading several groups of different algorithms then allows comparing the averaged signals, such as the total episode reward. In RL, there are several interesting performance metrics to consider, and this is easy to do by controlling the X-axis units in Coach Dashboard. It is possible to switch between several options such as the total number of steps or the total training time. -

+ +.. figure:: /_static/img/compare_by_time.png + :width: 800px + :align: center + + **Comparing Several Algorithms According to the Time Passed** + + +.. figure:: /_static/img/compare_by_num_episodes.png + :width: 800px + :align: center + + **Comparing Several Algorithms According to the Number of Episodes Played** - - - - - - - -
- -Comparing By Time - - -Comparing Several Algorithms According to the Time Passed - - - - -Comparing By Number of Episodes - - -Comparing Several Algorithms According to the Number of Episodes Played - - -
- - - -

- - diff --git a/docs_raw/docs/design/control_flow.md b/docs/_sources/design/control_flow.rst.txt similarity index 73% rename from docs_raw/docs/design/control_flow.md rename to docs/_sources/design/control_flow.rst.txt index b21132f..b41ddfd 100644 --- a/docs_raw/docs/design/control_flow.md +++ b/docs/_sources/design/control_flow.rst.txt @@ -1,35 +1,34 @@ - -# Coach Control Flow +Control Flow +============ Coach is built in a modular way, encouraging modules reuse and reducing the amount of boilerplate code needed for developing new algorithms or integrating a new challenge as an environment. On the other hand, it can be overwhelming for new users to ramp up on the code. To help with that, here's a short overview of the control flow. -## Graph Manager +Graph Manager +------------- -The main entry point for Coach is **coach.py**. +The main entry point for Coach is :code:`coach.py`. The main functionality of this script is to parse the command line arguments and invoke all the sub-processes needed for the given experiment. -**coach.py** executes the given **preset** file which returns a **GraphManager** object. +:code:`coach.py` executes the given **preset** file which returns a :code:`GraphManager` object. A **preset** is a design pattern that is intended for concentrating the entire definition of an experiment in a single file. This helps with experiments reproducibility, improves readability and prevents confusion. -The outcome of a preset is a **GraphManager** which will usually be instantiated in the final lines of the preset. +The outcome of a preset is a :code:`GraphManager` which will usually be instantiated in the final lines of the preset. -A **GraphManager** is an object that holds all the agents and environments of an experiment, and is mostly responsible +A :code:`GraphManager` is an object that holds all the agents and environments of an experiment, and is mostly responsible for scheduling their work. Why is it called a **graph** manager? Because agents and environments are structured into a graph of interactions. For example, in hierarchical reinforcement learning schemes, there will often be a master policy agent, that will control a sub-policy agent, which will interact with the environment. Other schemes can have much more complex graphs of control, such as several hierarchy layers, each with multiple agents. The graph manager's main loop is the improve loop. -

- -Improve loop - -

+.. image:: /_static/img/improve.png + :width: 400px + :align: center The improve loop skips between 3 main phases - heatup, training and evaluation: @@ -48,7 +47,8 @@ The improve loop skips between 3 main phases - heatup, training and evaluation: evaluation will be averaged in order to reduce the stochasticity effects of all the components. -## Level Manager +Level Manager +------------- In each of the 3 phases described above, the graph manager will invoke all the hierarchy levels in the graph in a synchronized manner. In Coach, agents do not interact directly with the environment. Instead, they go through a @@ -63,32 +63,40 @@ level can be seen as an interaction between an agent and an environment, even if a lower hierarchy level. -## Agent +Agent +----- The base agent class has 3 main function that will be used during those phases - observe, act and train. * **Observe** - this function gets the latest response from the environment as input, and updates the internal state of the agent with the new information. The environment response will - be first passed through the agent's **InputFilter** object, which will process the values in the response, according + be first passed through the agent's :code:`InputFilter` object, which will process the values in the response, according to the specific agent definition. The environment response will then be converted into a - **Transition** which will contain the information from a single step - ($ s_{t}, a_{t}, r_{t}, s_{t+1}, terminal signal $), and store it in the memory. + :code:`Transition` which will contain the information from a single step + :math:`(s_{t}, a_{t}, r_{t}, s_{t+1}, \textrm{terminal signal})`, and store it in the memory. + +.. image:: /_static/img/observe.png + :width: 700px + :align: center -Observe * **Act** - this function uses the current internal state of the agent in order to select the next action to take on - the environment. This function will call the per-agent custom function **choose_action** that will use the network + the environment. This function will call the per-agent custom function :code:`choose_action` that will use the network and the exploration policy in order to select an action. The action will be stored, together with any additional - information (like the action value for example) in an **ActionInfo** object. The ActionInfo object will then be - passed through the agent's **OutputFilter** to allow any processing of the action (like discretization, + information (like the action value for example) in an :code:`ActionInfo` object. The ActionInfo object will then be + passed through the agent's :code:`OutputFilter` to allow any processing of the action (like discretization, or shifting, for example), before passing it to the environment. -Act +.. image:: /_static/img/act.png + :width: 700px + :align: center * **Train** - this function will sample a batch from the memory and train on it. The batch of transitions will be - first wrapped into a **Batch** object to allow efficient querying of the batch values. It will then be passed into - the agent specific **learn_from_batch** function, that will extract network target values from the batch and will + first wrapped into a :code:`Batch` object to allow efficient querying of the batch values. It will then be passed into + the agent specific :code:`learn_from_batch` function, that will extract network target values from the batch and will train the networks accordingly. Lastly, if there's a target network defined for the agent, it will sync the target network weights with the online network. -Train +.. image:: /_static/img/train.png + :width: 700px + :align: center diff --git a/docs_raw/docs/design/horizontal_scaling.md b/docs/_sources/design/horizontal_scaling.rst.txt similarity index 100% rename from docs_raw/docs/design/horizontal_scaling.md rename to docs/_sources/design/horizontal_scaling.rst.txt diff --git a/docs/_sources/design/network.rst.txt b/docs/_sources/design/network.rst.txt new file mode 100644 index 0000000..aa45b76 --- /dev/null +++ b/docs/_sources/design/network.rst.txt @@ -0,0 +1,56 @@ +Network Design +============== + +Each agent has at least one neural network, used as the function approximator, for choosing the actions. +The network is designed in a modular way to allow reusability in different agents. +It is separated into three main parts: + +* **Input Embedders** - This is the first stage of the network, meant to convert the input into a feature vector representation. + It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs. + + There are two main types of input embedders: + + 1. Image embedder - Convolutional neural network. + 2. Vector embedder - Multi-layer perceptron. + + +* **Middlewares** - The middleware gets the output of the input embedder, and processes it into a different representation domain, + before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of + several input embedders, and pass them through some extra processing. + This, for instance, might include an LSTM or just a plain simple FC layer. + +* **Output Heads** - The output head is used in order to predict the values required from the network. + These might include action-values, state-values or a policy. As with the input embedders, + it is possible to use several output heads in the same network. For example, the *Actor Critic* agent combines two + heads - a policy head and a state-value head. + In addition, the output heads defines the loss function according to the head type. + + ​ +.. image:: /_static/img/network.png + :width: 400px + :align: center + +Keeping Network Copies in Sync +------------------------------ + +Most of the reinforcement learning agents include more than one copy of the neural network. +These copies serve as counterparts of the main network which are updated in different rates, +and are often synchronized either locally or between parallel workers. For easier synchronization of those copies, +a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent. +In this wrapper, 3 types of networks can be defined: + +* **online network** - A mandatory network which is the main network the agent will use + +* **global network** - An optional network which is shared between workers in single-node multi-process distributed learning. + It is updated by all the workers directly, and holds the most up-to-date weights. + +* **target network** - An optional network which is local for each worker. It can be used in order to keep a copy of + the weights stable for a long period of time. This is used in different agents, like DQN for example, in order to + have stable targets for the online network while training it. + + +.. image:: /_static/img/distributed.png + :width: 600px + :align: center + + diff --git a/docs/_sources/features/algorithms.rst.txt b/docs/_sources/features/algorithms.rst.txt new file mode 100644 index 0000000..eb5d19f --- /dev/null +++ b/docs/_sources/features/algorithms.rst.txt @@ -0,0 +1,10 @@ +Algorithms +========== + +Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes - +value optimization, policy optimization and imitation learning. +A detailed description of those algorithms may be found in the `agents <../components/agents/index.html>`_ section. + +.. image:: /_static/img/algorithms.png + :width: 600px + :align: center \ No newline at end of file diff --git a/docs/_sources/features/benchmarks.rst.txt b/docs/_sources/features/benchmarks.rst.txt new file mode 100644 index 0000000..292c849 --- /dev/null +++ b/docs/_sources/features/benchmarks.rst.txt @@ -0,0 +1,22 @@ +Benchmarks +========== + +Reinforcement learning is a developing field, and so far it has been particularly difficult to reproduce some of the +results published in the original papers. Some reasons for this are: + +* Reinforcement learning algorithms are notoriously known as having an unstable learning process. + The data the neural networks trains on is dynamic, and depends on the random seed defined for the environment. + +* Reinforcement learning algorithms have many moving parts. For some environments and agents, there are many + "tricks" which are needed to get the exact behavior the paper authors had seen. Also, there are **a lot** of + hyper-parameters to set. + +In order for a reinforcement learning implementation to be useful for research or for data science, it must be +shown that it achieves the expected behavior. For this reason, we collected a set of benchmark results from most +of the algorithms implemented in Coach. The algorithms were tested on a subset of the same environments that were +used in the original papers, and with multiple seed for each environment. +Additionally, Coach uses some strict testing mechanisms to try and make sure the results we show for these +benchmarks stay intact as Coach continues to develop. + +To see the benchmark results, please visit the +`following GitHub page `_. \ No newline at end of file diff --git a/docs/_sources/features/environments.rst.txt b/docs/_sources/features/environments.rst.txt new file mode 100644 index 0000000..e1d72ac --- /dev/null +++ b/docs/_sources/features/environments.rst.txt @@ -0,0 +1,31 @@ +Environments +============ + +Coach supports a large number of environments which can be solved using reinforcement learning. +To find a detailed documentation of the environments API, see the `environments section <../components/environments/index.html>`_. +The supported environments are: + +* `DeepMind Control Suite `_ - a set of reinforcement learning environments + powered by the MuJoCo physics engine. + +* `Blizzard Starcraft II `_ - a popular strategy game which was wrapped with a + python interface by DeepMind. + +* `ViZDoom `_ - a Doom-based AI research platform for reinforcement learning + from raw visual information. + +* `CARLA `_ - an open-source simulator for autonomous driving research. + +* `OpenAI Gym `_ - a library which consists of a set of environments, from games to robotics. + Additionally, it can be extended using the API defined by the authors. + + In Coach, we support all the native environments in Gym, along with several extensions such as: + + * `Roboschool `_ - a set of environments powered by the PyBullet engine, + that offer a free alternative to MuJoCo. + + * `Gym Extensions `_ - a set of environments that extends Gym for + auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.) + + * `PyBullet `_ - a physics engine that + includes a set of robotics environments. diff --git a/docs/_sources/features/index.rst.txt b/docs/_sources/features/index.rst.txt new file mode 100644 index 0000000..3661755 --- /dev/null +++ b/docs/_sources/features/index.rst.txt @@ -0,0 +1,10 @@ +Features +======== + +.. toctree:: + :maxdepth: 1 + :caption: Features + + algorithms + environments + benchmarks \ No newline at end of file diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt new file mode 100644 index 0000000..1543fba --- /dev/null +++ b/docs/_sources/index.rst.txt @@ -0,0 +1,72 @@ +.. Reinforcement Learning Coach documentation master file, created by + sphinx-quickstart on Sun Oct 28 15:35:09 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +Reinforcement Learning Coach +============================ + +Coach is a python framework which models the interaction between an agent and an environment in a modular way. +With Coach, it is possible to model an agent by combining various building blocks, and training the agent on multiple environments. +The available environments allow testing the agent in different fields such as robotics, autonomous driving, games and more. +It exposes a set of easy-to-use APIs for experimenting with new RL algorithms, and allows simple integration of +new environments to solve. +Coach collects statistics from the training process and supports advanced visualization techniques for debugging the agent being trained. + +.. image:: _static/img/design.png + :width: 800px + +Blog posts from the Intel® AI website: + +* `Release 0.8.0 `_ (initial release) + +* `Release 0.9.0 `_ + +* `Release 0.10.0 `_ + +* `Release 0.11.0 `_ (current release) + +You can find more details in the `GitHub repository `_. + + +.. toctree:: + :maxdepth: 2 + :caption: Intro + :titlesonly: + + usage + features/index + selecting_an_algorithm + dashboard + + +.. toctree:: + :maxdepth: 1 + :caption: Design + + design/control_flow + design/network + +.. toctree:: + :maxdepth: 1 + :caption: Contributing + + contributing/add_agent + contributing/add_env + +.. toctree:: + :maxdepth: 1 + :caption: Components + + components/agents/index + components/architectures/index + components/environments/index + components/exploration_policies/index + components/filters/index + components/memories/index + components/core_types + components/spaces + components/additional_parameters + + diff --git a/docs/_sources/selecting_an_algorithm.rst.txt b/docs/_sources/selecting_an_algorithm.rst.txt new file mode 100644 index 0000000..c867191 --- /dev/null +++ b/docs/_sources/selecting_an_algorithm.rst.txt @@ -0,0 +1,270 @@ +Selecting an Algorithm +====================== + +As you probably already noticed, Coach has a lot of algorithms implemented into it: + +.. image:: /_static/img/algorithms.png + :width: 800px + :align: center + +**"ok that's prefect, but I am trying to build a solution for my application, how do I select the right algorithm?"** + +We collected some guidelines for how to choose the right algorithm for your application. +Answer the following questions to see what are the best algorithms for your task. +The algorithms are ordered by their release date in descending order. + +.. raw:: html + + + + +
+
+ What are the type of actions your task requires? +
+ Discrete actions
+ Continuous actions
+
+ Do you have expert demonstrations for your task?
+ Can you collect new data for your task dynamically?
+ Do you have a simulator for your task?
+
+ +
+
+
+ + DQN +
+ Learns action values for discrete actions, and allows learning from a replay buffer with old experiences +
+
+
+ + Rainbow +
+ Combines multiple recent innovations on top of DQN for discrete controls, and achieves + much better results on known benchmarks +
+
+
+ + HAC +
+ Works only for continuous actions, and uses hierarchy of agents to make the learning + more simple +
+
+
+ + DDQN +
+ An improvement over DQN, which learns more accurate action values, and therefore achieves better results + on known benchmarks +
+
+
+ + DFP +
+ Works only for discrete actions, by learning to predict the future values of a set of + measurements from the environment, and then using a goal vector to weight the importance of each of the + measurements +
+
+
+ + MMC +
+ A simple modification to DQN, which instead of learning action values only by bootstrapping the current + action value prediction, it mixes in the total discounted return as well. This helps learn the correct + action values faster, and is particularly useful for environments with delayed rewards. +
+
+
+ + PAL +
+ An improvement over DQN, that tries to deal with the approximation errors present in reinforcement + learning by increasing the gap between the value of the best action and the second best action. +
+
+
+ + NAF +
+ A variant of Q learning for continuous control. +
+
+
+ + NEC +
+ Uses a memory to "memorize" its experience and learn much faster by querying the memory on newly + seen states. +
+
+
+ + QR DQN +
+ Uses quantile regression to learn a distribution over the action values instead of only their mean. + This boosts performance on known benchmarks. +
+
+
+ + Bootstrapped DQN +
+ Uses an ensemble of DQN networks, where each network learns from a different subset of the experience + in order to improve exploration. +
+
+
+ + N-Step Q Learning +
+ A variant of Q learning that uses bootstrapping of N steps ahead, instead of 1 step. Doing this + makes the algorithm on-policy and therefore requires having multiple workers training in parallel in + order for it to work well. +
+
+
+ + Categorical DQN +
+ Learns a distribution over the action values instead of only their mean. This boosts performance on + known algorithms but requires knowing the range of possible values for the accumulated rewards before hand. +
+
+
+ + Policy Gradient +
+ Based on the REINFORCE algorithm, this algorithm learn a probability distribution over the actions. + This is the most simple algorithm available in Coach, but also has the worse results. +
+
+
+ + Actor Critic (A3C / A2C) +
+ Combines REINFORCE with a learned baseline (Critic) to improve stability of learning. It also + introduced the parallel learning of multiple workers to speed up data collection and improve the + learning stability and speed, both for discrete and continuous action spaces. +
+
+
+ + DDPG +
+ An actor critic scheme for continuous action spaces which assumes that the policy is deterministic, + and therefore it is able to use a replay buffer in order to improve sample efficiency. +
+
+
+ + PPO +
+ An actor critic scheme which uses bounded updates to the policy in order to make the learning process + very stable. +
+
+
+ + Clipped PPO +
+ A simplification of PPO, that reduces the code complexity while achieving similar results. +
+
+
+ + BC +
+ The simplest form of imitation learning. Uses supervised learning on a dataset of expert demonstrations + in order to imitate the expert behavior. +
+
+
+ + CIL +
+ A variant of behavioral cloning, where the learned policy is disassembled to several skills + (such as turning left or right in an intersection), and each skill is learned separately from the + human demonstrations. +
+
+
+
+ + +1. Does your environment have a discrete or continuous action space? +-------------------------------------------------------------------- + +Some reinforcement learning algorithms work only for discrete action spaces, where the agent needs to select +one out of several possible actions. Other algorithms work only for continuous action spaces, where there are +infinite possible actions, but there is some spatial relationship between the actions. And there are some algorithms +that can be applied in both cases. The available algorithms highly depend on the task at hand. + + +2. Is collecting more samples from your environment painful? +------------------------------------------------------------ + +Reinforcement learning algorithm are notoriously known for the amount of samples they need for training. +Typically, on-policy algorithms are much less sample efficient compared to off-policy algorithms. But there are +other algorithmic features that allow improving the sample efficiency even more, like using a DND in NEC, or using +Hindsight Experience Replay. It is hard to say which algorithm is the most sample efficient, but we can at least say +which ones are not sample efficient. + + +3. Do you have a simulator that can be parallelized across multiple processes or nodes? +--------------------------------------------------------------------------------------- + +Parallelizing training across multiple workers which are located on the same node or on different nodes is a technique +that has been introduced in recent years and achieved a lot of success in improving the results of multiple algorithms. +As part of this, there are some algorithms that don't work well without being parallelized with multiple workers +working in parallel, which requires having a simulator for each worker. + + +4. Do you have human demonstrations for solving the task? +--------------------------------------------------------- + +If human demonstrations are available for a task, most of the time it would be better to use those instead of training +using regular reinforcement learning from scratch. To use human demonstrations we have implemented several tools and +algorithms for imitation learning in Coach. diff --git a/docs/_sources/test.rst.txt b/docs/_sources/test.rst.txt new file mode 100644 index 0000000..51c4298 --- /dev/null +++ b/docs/_sources/test.rst.txt @@ -0,0 +1,8 @@ +test +---- + +.. important:: Its a note! in markdown! + +.. autoclass:: rl_coach.agents.dqn_agent.DQNAgent + :members: + :inherited-members: \ No newline at end of file diff --git a/docs/_sources/usage.rst.txt b/docs/_sources/usage.rst.txt new file mode 100644 index 0000000..d9eeba9 --- /dev/null +++ b/docs/_sources/usage.rst.txt @@ -0,0 +1,158 @@ +Usage +===== + +One of the mechanism Coach uses for running experiments is the **Preset** mechanism. +As its name implies, a preset defines a set of predefined experiment parameters. +This allows defining a *complex* agent-environment interaction, with multiple parameters, and later running it through +a very *simple* command line. + +The preset includes all the components that are used in the experiment, such as the agent internal components and +the environment to use. +It additionally defines general parameters for the experiment itself, such as the training schedule, +visualization parameters, and testing parameters. + +Training an Agent +----------------- + +Single-threaded Algorithms +++++++++++++++++++++++++++ + +This is the most common case. Just choose a preset using the `-p` flag and press enter. +To list the available presets, use the `-l` flag. + +*Example:* + +.. code-block:: python + + coach -p CartPole_DQN + +Multi-threaded Algorithms ++++++++++++++++++++++++++ + +Multi-threaded algorithms are very common this days. +They typically achieve the best results, and scale gracefully with the number of threads. +In Coach, running such algorithms is done by selecting a suitable preset, and choosing the number of threads to run using the :code:`-n` flag. + +*Example:* + +.. code-block:: python + + coach -p CartPole_A3C -n 8 + +Evaluating an Agent +------------------- + +There are several options for evaluating an agent during the training: + +* For multi-threaded runs, an evaluation agent will constantly run in the background and evaluate the model during the training. + +* For single-threaded runs, it is possible to define an evaluation period through the preset. This will run several episodes of evaluation once in a while. + +Additionally, it is possible to save checkpoints of the agents networks and then run only in evaluation mode. +Saving checkpoints can be done by specifying the number of seconds between storing checkpoints using the :code:`-s` flag. +The checkpoints will be saved into the experiment directory. +Loading a model for evaluation can be done by specifying the :code:`-crd` flag with the experiment directory, and the :code:`--evaluate` flag to disable training. + +*Example:* + +.. code-block:: python + + coach -p CartPole_DQN -s 60 + coach -p CartPole_DQN --evaluate -crd CHECKPOINT_RESTORE_DIR + +Playing with the Environment as a Human +--------------------------------------- + +Interacting with the environment as a human can be useful for understanding its difficulties and for collecting data for imitation learning. +In Coach, this can be easily done by selecting a preset that defines the environment to use, and specifying the :code:`--play` flag. +When the environment is loaded, the available keyboard buttons will be printed to the screen. +Pressing the escape key when finished will end the simulation and store the replay buffer in the experiment dir. + +*Example:* + +.. code-block:: python + + coach -et rl_coach.environments.gym_environment:Atari -lvl BreakoutDeterministic-v4 --play + +Learning Through Imitation Learning +----------------------------------- + +Learning through imitation of human behavior is a nice way to speedup the learning. +In Coach, this can be done in two steps - + +1. Create a dataset of demonstrations by playing with the environment as a human. + After this step, a pickle of the replay buffer containing your game play will be stored in the experiment directory. + The path to this replay buffer will be printed to the screen. + To do so, you should select an environment type and level through the command line, and specify the :code:`--play` flag. + + *Example:* + +.. code-block:: python + + coach -et rl_coach.environments.doom_environment:DoomEnvironmentParameters -lvl Basic --play + + +2. Next, use an imitation learning preset and set the replay buffer path accordingly. + The path can be set either from the command line or from the preset itself. + + *Example:* + +.. code-block:: python + + coach -p Doom_Basic_BC -cp='agent.load_memory_from_file_path=\"/replay_buffer.p\"' + + +Visualizations +-------------- + +Rendering the Environment ++++++++++++++++++++++++++ + +Rendering the environment can be done by using the :code:`-r` flag. +When working with multi-threaded algorithms, the rendered image will be representing the game play of the evaluation worker. +When working with single-threaded algorithms, the rendered image will be representing the single worker which can be either training or evaluating. +Keep in mind that rendering the environment in single-threaded algorithms may slow the training to some extent. +When playing with the environment using the :code:`--play` flag, the environment will be rendered automatically without the need for specifying the :code:`-r` flag. + +*Example:* + +.. code-block:: python + + coach -p Breakout_DQN -r + +Dumping GIFs +++++++++++++ + +Coach allows storing GIFs of the agent game play. +To dump GIF files, use the :code:`-dg` flag. +The files are dumped after every evaluation episode, and are saved into the experiment directory, under a gifs sub-directory. + +*Example:* + +.. code-block:: python + + coach -p Breakout_A3C -n 4 -dg + +Switching Between Deep Learning Frameworks +------------------------------------------ + +Coach uses TensorFlow as its main backend framework, but it also supports MXNet. +MXNet is optional, and by default, TensorFlow will be used. +If MXNet was installed, it is possible to switch to MXNet using the :code:`-f` flag. + +*Example:* + +.. code-block:: python + + coach -p Doom_Basic_DQN -f mxnet + +Additional Flags +---------------- + +There are several convenient flags which are important to know about. +The most up to date description can be found by using the :code:`-h` flag. + +.. argparse:: + :module: rl_coach.coach + :func: create_argument_parser + :prog: coach \ No newline at end of file diff --git a/docs/_static/ajax-loader.gif b/docs/_static/ajax-loader.gif new file mode 100644 index 0000000..61faf8c Binary files /dev/null and b/docs/_static/ajax-loader.gif differ diff --git a/docs/_static/basic.css b/docs/_static/basic.css new file mode 100644 index 0000000..104f076 --- /dev/null +++ b/docs/_static/basic.css @@ -0,0 +1,676 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li div.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 450px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px 7px 0 7px; + background-color: #ffe; + width: 40%; + float: right; +} + +p.sidebar-title { + font-weight: bold; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px 7px 0 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +div.admonition dl { + margin-bottom: 0; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist td { + vertical-align: top; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +dl { + margin-bottom: 15px; +} + +dd p { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +td.linenos pre { + padding: 5px 0px; + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + margin-left: 0.5em; +} + +table.highlighttable td { + padding: 0 0.5em 0 0.5em; +} + +div.code-block-caption { + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +div.code-block-caption + div > div.highlight > pre { + margin-top: 0; +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + padding: 1em 1em 0; +} + +div.literal-block-wrapper div.highlight { + margin: 0; +} + +code.descname { + background-color: transparent; + font-weight: bold; + font-size: 1.2em; +} + +code.descclassname { + background-color: transparent; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: relative; + left: 0px; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/docs/_static/comment-bright.png b/docs/_static/comment-bright.png new file mode 100644 index 0000000..15e27ed Binary files /dev/null and b/docs/_static/comment-bright.png differ diff --git a/docs/_static/comment-close.png b/docs/_static/comment-close.png new file mode 100644 index 0000000..4d91bcf Binary files /dev/null and b/docs/_static/comment-close.png differ diff --git a/docs/_static/comment.png b/docs/_static/comment.png new file mode 100644 index 0000000..dfbc0cb Binary files /dev/null and b/docs/_static/comment.png differ diff --git a/docs/_static/css/badge_only.css b/docs/_static/css/badge_only.css new file mode 100644 index 0000000..323730a --- /dev/null +++ b/docs/_static/css/badge_only.css @@ -0,0 +1 @@ +.fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 0000000..6ab0757 --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,61 @@ +/* Docs background */ +.wy-side-nav-search{ + background-color: #043c74; +} + +/* Mobile version */ +.wy-nav-top{ + background-color: #043c74; +} + + +.green { + color: green; +} + +.red { + color: red; +} + +.blue { + color: blue; +} + +.yellow { + color: yellow; +} + +.badge { + border: 2px; + border-style: solid; + border-color: #6C8EBF; + border-radius: 5px; + padding: 3px 15px 3px 15px; + margin: 5px; + display: inline-block; + font-weight: bold; + font-size: 16px; + background: #DAE8FC; +} + +.badge:hover { + cursor: pointer; +} + +.badge > a { + color: black; +} + +.bordered-container { + border: 0px; + border-style: solid; + border-radius: 8px; + padding: 15px; + margin-bottom: 20px; + background: #f2f2f2; +} + +.questionnaire { + font-size: 1.2em; + line-height: 1.5em; +} \ No newline at end of file diff --git a/docs/_static/css/theme.css b/docs/_static/css/theme.css new file mode 100644 index 0000000..b19dbfe --- /dev/null +++ b/docs/_static/css/theme.css @@ -0,0 +1,6 @@ +/* sphinx_rtd_theme version 0.4.2 | MIT license */ +/* Built 20181005 13:10 */ +*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}[hidden]{display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:hover,a:active{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;color:#000;text-decoration:none}mark{background:#ff0;color:#000;font-style:italic;font-weight:bold}pre,code,.rst-content tt,.rst-content code,kbd,samp{font-family:monospace,serif;_font-family:"courier new",monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:before,q:after{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}ul,ol,dl{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure{margin:0}form{margin:0}fieldset{border:0;margin:0;padding:0}label{cursor:pointer}legend{border:0;*margin-left:-7px;padding:0;white-space:normal}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*width:13px;*height:13px}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top;resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none !important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{html,body,section{background:none !important}*{box-shadow:none !important;text-shadow:none !important;filter:none !important;-ms-filter:none !important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:.5cm}p,h2,.rst-content .toctree-wrapper p.caption,h3{orphans:3;widows:3}h2,.rst-content .toctree-wrapper p.caption,h3{page-break-after:avoid}}.fa:before,.wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.rst-content .admonition,.btn,input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"],select,textarea,.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a,.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a,.wy-nav-top a{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:'FontAwesome';src:url("../fonts/fontawesome-webfont.eot?v=4.7.0");src:url("../fonts/fontawesome-webfont.eot?#iefix&v=4.7.0") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff2?v=4.7.0") format("woff2"),url("../fonts/fontawesome-webfont.woff?v=4.7.0") format("woff"),url("../fonts/fontawesome-webfont.ttf?v=4.7.0") format("truetype"),url("../fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular") format("svg");font-weight:normal;font-style:normal}.fa,.wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.rst-content code.download span:first-child,.icon{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.3333333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.2857142857em;text-align:center}.fa-ul{padding-left:0;margin-left:2.1428571429em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.1428571429em;width:2.1428571429em;top:.1428571429em;text-align:center}.fa-li.fa-lg{left:-1.8571428571em}.fa-border{padding:.2em .25em .15em;border:solid 0.08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left,.wy-menu-vertical li span.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a span.fa-pull-left.toctree-expand,.wy-menu-vertical li.current>a span.fa-pull-left.toctree-expand,.rst-content .fa-pull-left.admonition-title,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content dl dt .fa-pull-left.headerlink,.rst-content p.caption .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.rst-content code.download span.fa-pull-left:first-child,.fa-pull-left.icon{margin-right:.3em}.fa.fa-pull-right,.wy-menu-vertical li span.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a span.fa-pull-right.toctree-expand,.wy-menu-vertical li.current>a span.fa-pull-right.toctree-expand,.rst-content .fa-pull-right.admonition-title,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content dl dt .fa-pull-right.headerlink,.rst-content p.caption .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.rst-content code.download span.fa-pull-right:first-child,.fa-pull-right.icon{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.wy-menu-vertical li span.pull-left.toctree-expand,.wy-menu-vertical li.on a span.pull-left.toctree-expand,.wy-menu-vertical li.current>a span.pull-left.toctree-expand,.rst-content .pull-left.admonition-title,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content dl dt .pull-left.headerlink,.rst-content p.caption .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.rst-content code.download span.pull-left:first-child,.pull-left.icon{margin-right:.3em}.fa.pull-right,.wy-menu-vertical li span.pull-right.toctree-expand,.wy-menu-vertical li.on a span.pull-right.toctree-expand,.wy-menu-vertical li.current>a span.pull-right.toctree-expand,.rst-content .pull-right.admonition-title,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content dl dt .pull-right.headerlink,.rst-content p.caption .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.rst-content code.download span.pull-right:first-child,.pull-right.icon{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-remove:before,.fa-close:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-gear:before,.fa-cog:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-rotate-right:before,.fa-repeat:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.rst-content .admonition-title:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-warning:before,.fa-exclamation-triangle:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-gears:before,.fa-cogs:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-save:before,.fa-floppy-o:before{content:""}.fa-square:before{content:""}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.wy-dropdown .caret:before,.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-unsorted:before,.fa-sort:before{content:""}.fa-sort-down:before,.fa-sort-desc:before{content:""}.fa-sort-up:before,.fa-sort-asc:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-legal:before,.fa-gavel:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-flash:before,.fa-bolt:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-paste:before,.fa-clipboard:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-unlink:before,.fa-chain-broken:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:""}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:""}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:""}.fa-euro:before,.fa-eur:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-rupee:before,.fa-inr:before{content:""}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:""}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:""}.fa-won:before,.fa-krw:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-turkish-lira:before,.fa-try:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li span.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-institution:before,.fa-bank:before,.fa-university:before{content:""}.fa-mortar-board:before,.fa-graduation-cap:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:""}.fa-file-zip-o:before,.fa-file-archive-o:before{content:""}.fa-file-sound-o:before,.fa-file-audio-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-resistance:before,.fa-rebel:before{content:""}.fa-ge:before,.fa-empire:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-send:before,.fa-paper-plane:before{content:""}.fa-send-o:before,.fa-paper-plane-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-hotel:before,.fa-bed:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-yc:before,.fa-y-combinator:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery:before,.fa-battery-full:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-tv:before,.fa-television:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-asl-interpreting:before,.fa-american-sign-language-interpreting:before{content:""}.fa-deafness:before,.fa-hard-of-hearing:before,.fa-deaf:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-signing:before,.fa-sign-language:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-vcard:before,.fa-address-card:before{content:""}.fa-vcard-o:before,.fa-address-card-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer:before,.fa-thermometer-full:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bathtub:before,.fa-s15:before,.fa-bath:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.rst-content code.download span:first-child,.icon,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context{font-family:inherit}.fa:before,.wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before{font-family:"FontAwesome";display:inline-block;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa,a .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li a span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,a .rst-content .admonition-title,.rst-content a .admonition-title,a .rst-content h1 .headerlink,.rst-content h1 a .headerlink,a .rst-content h2 .headerlink,.rst-content h2 a .headerlink,a .rst-content h3 .headerlink,.rst-content h3 a .headerlink,a .rst-content h4 .headerlink,.rst-content h4 a .headerlink,a .rst-content h5 .headerlink,.rst-content h5 a .headerlink,a .rst-content h6 .headerlink,.rst-content h6 a .headerlink,a .rst-content dl dt .headerlink,.rst-content dl dt a .headerlink,a .rst-content p.caption .headerlink,.rst-content p.caption a .headerlink,a .rst-content table>caption .headerlink,.rst-content table>caption a .headerlink,a .rst-content tt.download span:first-child,.rst-content tt.download a span:first-child,a .rst-content code.download span:first-child,.rst-content code.download a span:first-child,a .icon{display:inline-block;text-decoration:inherit}.btn .fa,.btn .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .btn span.toctree-expand,.btn .wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.on a .btn span.toctree-expand,.btn .wy-menu-vertical li.current>a span.toctree-expand,.wy-menu-vertical li.current>a .btn span.toctree-expand,.btn .rst-content .admonition-title,.rst-content .btn .admonition-title,.btn .rst-content h1 .headerlink,.rst-content h1 .btn .headerlink,.btn .rst-content h2 .headerlink,.rst-content h2 .btn .headerlink,.btn .rst-content h3 .headerlink,.rst-content h3 .btn .headerlink,.btn .rst-content h4 .headerlink,.rst-content h4 .btn .headerlink,.btn .rst-content h5 .headerlink,.rst-content h5 .btn .headerlink,.btn .rst-content h6 .headerlink,.rst-content h6 .btn .headerlink,.btn .rst-content dl dt .headerlink,.rst-content dl dt .btn .headerlink,.btn .rst-content p.caption .headerlink,.rst-content p.caption .btn .headerlink,.btn .rst-content table>caption .headerlink,.rst-content table>caption .btn .headerlink,.btn .rst-content tt.download span:first-child,.rst-content tt.download .btn span:first-child,.btn .rst-content code.download span:first-child,.rst-content code.download .btn span:first-child,.btn .icon,.nav .fa,.nav .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .nav span.toctree-expand,.nav .wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.on a .nav span.toctree-expand,.nav .wy-menu-vertical li.current>a span.toctree-expand,.wy-menu-vertical li.current>a .nav span.toctree-expand,.nav .rst-content .admonition-title,.rst-content .nav .admonition-title,.nav .rst-content h1 .headerlink,.rst-content h1 .nav .headerlink,.nav .rst-content h2 .headerlink,.rst-content h2 .nav .headerlink,.nav .rst-content h3 .headerlink,.rst-content h3 .nav .headerlink,.nav .rst-content h4 .headerlink,.rst-content h4 .nav .headerlink,.nav .rst-content h5 .headerlink,.rst-content h5 .nav .headerlink,.nav .rst-content h6 .headerlink,.rst-content h6 .nav .headerlink,.nav .rst-content dl dt .headerlink,.rst-content dl dt .nav .headerlink,.nav .rst-content p.caption .headerlink,.rst-content p.caption .nav .headerlink,.nav .rst-content table>caption .headerlink,.rst-content table>caption .nav .headerlink,.nav .rst-content tt.download span:first-child,.rst-content tt.download .nav span:first-child,.nav .rst-content code.download span:first-child,.rst-content code.download .nav span:first-child,.nav .icon{display:inline}.btn .fa.fa-large,.btn .wy-menu-vertical li span.fa-large.toctree-expand,.wy-menu-vertical li .btn span.fa-large.toctree-expand,.btn .rst-content .fa-large.admonition-title,.rst-content .btn .fa-large.admonition-title,.btn .rst-content h1 .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.btn .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .btn .fa-large.headerlink,.btn .rst-content p.caption .fa-large.headerlink,.rst-content p.caption .btn .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.rst-content tt.download .btn span.fa-large:first-child,.btn .rst-content code.download span.fa-large:first-child,.rst-content code.download .btn span.fa-large:first-child,.btn .fa-large.icon,.nav .fa.fa-large,.nav .wy-menu-vertical li span.fa-large.toctree-expand,.wy-menu-vertical li .nav span.fa-large.toctree-expand,.nav .rst-content .fa-large.admonition-title,.rst-content .nav .fa-large.admonition-title,.nav .rst-content h1 .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.nav .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.nav .rst-content p.caption .fa-large.headerlink,.rst-content p.caption .nav .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.nav .rst-content code.download span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.nav .fa-large.icon{line-height:.9em}.btn .fa.fa-spin,.btn .wy-menu-vertical li span.fa-spin.toctree-expand,.wy-menu-vertical li .btn span.fa-spin.toctree-expand,.btn .rst-content .fa-spin.admonition-title,.rst-content .btn .fa-spin.admonition-title,.btn .rst-content h1 .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.btn .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .btn .fa-spin.headerlink,.btn .rst-content p.caption .fa-spin.headerlink,.rst-content p.caption .btn .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.rst-content tt.download .btn span.fa-spin:first-child,.btn .rst-content code.download span.fa-spin:first-child,.rst-content code.download .btn span.fa-spin:first-child,.btn .fa-spin.icon,.nav .fa.fa-spin,.nav .wy-menu-vertical li span.fa-spin.toctree-expand,.wy-menu-vertical li .nav span.fa-spin.toctree-expand,.nav .rst-content .fa-spin.admonition-title,.rst-content .nav .fa-spin.admonition-title,.nav .rst-content h1 .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.nav .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.nav .rst-content p.caption .fa-spin.headerlink,.rst-content p.caption .nav .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.nav .rst-content code.download span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.nav .fa-spin.icon{display:inline-block}.btn.fa:before,.wy-menu-vertical li span.btn.toctree-expand:before,.rst-content .btn.admonition-title:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content dl dt .btn.headerlink:before,.rst-content p.caption .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.rst-content code.download span.btn:first-child:before,.btn.icon:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.wy-menu-vertical li span.btn.toctree-expand:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content p.caption .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.rst-content code.download span.btn:first-child:hover:before,.btn.icon:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li .btn-mini span.toctree-expand:before,.btn-mini .rst-content .admonition-title:before,.rst-content .btn-mini .admonition-title:before,.btn-mini .rst-content h1 .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.btn-mini .rst-content dl dt .headerlink:before,.rst-content dl dt .btn-mini .headerlink:before,.btn-mini .rst-content p.caption .headerlink:before,.rst-content p.caption .btn-mini .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.rst-content tt.download .btn-mini span:first-child:before,.btn-mini .rst-content code.download span:first-child:before,.rst-content code.download .btn-mini span:first-child:before,.btn-mini .icon:before{font-size:14px;vertical-align:-15%}.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.rst-content .admonition{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.wy-alert-title,.rst-content .admonition-title{color:#fff;font-weight:bold;display:block;color:#fff;background:#6ab0de;margin:-12px;padding:6px 12px;margin-bottom:12px}.wy-alert.wy-alert-danger,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.admonition{background:#fdf3f2}.wy-alert.wy-alert-danger .wy-alert-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .danger .wy-alert-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .danger .admonition-title,.rst-content .error .admonition-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition .admonition-title{background:#f29f97}.wy-alert.wy-alert-warning,.rst-content .wy-alert-warning.note,.rst-content .attention,.rst-content .caution,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.tip,.rst-content .warning,.rst-content .wy-alert-warning.seealso,.rst-content .admonition-todo,.rst-content .wy-alert-warning.admonition{background:#ffedcc}.wy-alert.wy-alert-warning .wy-alert-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .attention .wy-alert-title,.rst-content .caution .wy-alert-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .attention .admonition-title,.rst-content .caution .admonition-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .warning .admonition-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .admonition-todo .admonition-title,.rst-content .wy-alert-warning.admonition .admonition-title{background:#f0b37e}.wy-alert.wy-alert-info,.rst-content .note,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.rst-content .seealso,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.admonition{background:#e7f2fa}.wy-alert.wy-alert-info .wy-alert-title,.rst-content .note .wy-alert-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.rst-content .note .admonition-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .seealso .admonition-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition .admonition-title{background:#6ab0de}.wy-alert.wy-alert-success,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.warning,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.admonition{background:#dbfaf4}.wy-alert.wy-alert-success .wy-alert-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .hint .wy-alert-title,.rst-content .important .wy-alert-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .hint .admonition-title,.rst-content .important .admonition-title,.rst-content .tip .admonition-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition .admonition-title{background:#1abc9c}.wy-alert.wy-alert-neutral,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.admonition{background:#f3f6f6}.wy-alert.wy-alert-neutral .wy-alert-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition .admonition-title{color:#404040;background:#e1e4e5}.wy-alert.wy-alert-neutral a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a{color:#2980B9}.wy-alert p:last-child,.rst-content .note p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.rst-content .seealso p:last-child,.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0px;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,0.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27AE60}.wy-tray-container li.wy-tray-item-info{background:#2980B9}.wy-tray-container li.wy-tray-item-warning{background:#E67E22}.wy-tray-container li.wy-tray-item-danger{background:#E74C3C}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width: 768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px 12px;color:#fff;border:1px solid rgba(0,0,0,0.1);background-color:#27AE60;text-decoration:none;font-weight:normal;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:0px 1px 2px -1px rgba(255,255,255,0.5) inset,0px -2px 0px 0px rgba(0,0,0,0.1) inset;outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:0px -1px 0px 0px rgba(0,0,0,0.05) inset,0px 2px 0px 0px rgba(0,0,0,0.1) inset;padding:8px 12px 6px 12px}.btn:visited{color:#fff}.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn-disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn-disabled:hover,.btn-disabled:focus,.btn-disabled:active{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980B9 !important}.btn-info:hover{background-color:#2e8ece !important}.btn-neutral{background-color:#f3f6f6 !important;color:#404040 !important}.btn-neutral:hover{background-color:#e5ebeb !important;color:#404040}.btn-neutral:visited{color:#404040 !important}.btn-success{background-color:#27AE60 !important}.btn-success:hover{background-color:#295 !important}.btn-danger{background-color:#E74C3C !important}.btn-danger:hover{background-color:#ea6153 !important}.btn-warning{background-color:#E67E22 !important}.btn-warning:hover{background-color:#e98b39 !important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f !important}.btn-link{background-color:transparent !important;color:#2980B9;box-shadow:none;border-color:transparent !important}.btn-link:hover{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:active{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:visited{color:#9B59B6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:before,.wy-btn-group:after{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:solid 1px #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,0.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980B9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:solid 1px #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type="search"]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980B9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned input,.wy-form-aligned textarea,.wy-form-aligned select,.wy-form-aligned .wy-help-inline,.wy-form-aligned label{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{border:0;margin:0;padding:0}legend{display:block;width:100%;border:0;padding:0;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label{display:block;margin:0 0 .3125em 0;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;*zoom:1;max-width:68em;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#E74C3C}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full input[type="text"],.wy-control-group .wy-form-full input[type="password"],.wy-control-group .wy-form-full input[type="email"],.wy-control-group .wy-form-full input[type="url"],.wy-control-group .wy-form-full input[type="date"],.wy-control-group .wy-form-full input[type="month"],.wy-control-group .wy-form-full input[type="time"],.wy-control-group .wy-form-full input[type="datetime"],.wy-control-group .wy-form-full input[type="datetime-local"],.wy-control-group .wy-form-full input[type="week"],.wy-control-group .wy-form-full input[type="number"],.wy-control-group .wy-form-full input[type="search"],.wy-control-group .wy-form-full input[type="tel"],.wy-control-group .wy-form-full input[type="color"],.wy-control-group .wy-form-halves input[type="text"],.wy-control-group .wy-form-halves input[type="password"],.wy-control-group .wy-form-halves input[type="email"],.wy-control-group .wy-form-halves input[type="url"],.wy-control-group .wy-form-halves input[type="date"],.wy-control-group .wy-form-halves input[type="month"],.wy-control-group .wy-form-halves input[type="time"],.wy-control-group .wy-form-halves input[type="datetime"],.wy-control-group .wy-form-halves input[type="datetime-local"],.wy-control-group .wy-form-halves input[type="week"],.wy-control-group .wy-form-halves input[type="number"],.wy-control-group .wy-form-halves input[type="search"],.wy-control-group .wy-form-halves input[type="tel"],.wy-control-group .wy-form-halves input[type="color"],.wy-control-group .wy-form-thirds input[type="text"],.wy-control-group .wy-form-thirds input[type="password"],.wy-control-group .wy-form-thirds input[type="email"],.wy-control-group .wy-form-thirds input[type="url"],.wy-control-group .wy-form-thirds input[type="date"],.wy-control-group .wy-form-thirds input[type="month"],.wy-control-group .wy-form-thirds input[type="time"],.wy-control-group .wy-form-thirds input[type="datetime"],.wy-control-group .wy-form-thirds input[type="datetime-local"],.wy-control-group .wy-form-thirds input[type="week"],.wy-control-group .wy-form-thirds input[type="number"],.wy-control-group .wy-form-thirds input[type="search"],.wy-control-group .wy-form-thirds input[type="tel"],.wy-control-group .wy-form-thirds input[type="color"]{width:100%}.wy-control-group .wy-form-full{float:left;display:block;margin-right:2.3576515979%;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.3576515979%;width:48.821174201%}.wy-control-group .wy-form-halves:last-child{margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n+1){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.3576515979%;width:31.7615656014%}.wy-control-group .wy-form-thirds:last-child{margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control{margin:6px 0 0 0;font-size:90%}.wy-control-no-input{display:inline-block;margin:6px 0 0 0;font-size:90%}.wy-control-group.fluid-input input[type="text"],.wy-control-group.fluid-input input[type="password"],.wy-control-group.fluid-input input[type="email"],.wy-control-group.fluid-input input[type="url"],.wy-control-group.fluid-input input[type="date"],.wy-control-group.fluid-input input[type="month"],.wy-control-group.fluid-input input[type="time"],.wy-control-group.fluid-input input[type="datetime"],.wy-control-group.fluid-input input[type="datetime-local"],.wy-control-group.fluid-input input[type="week"],.wy-control-group.fluid-input input[type="number"],.wy-control-group.fluid-input input[type="search"],.wy-control-group.fluid-input input[type="tel"],.wy-control-group.fluid-input input[type="color"]{width:100%}.wy-form-message-inline{display:inline-block;padding-left:.3em;color:#666;vertical-align:middle;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;*overflow:visible}input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type="datetime-local"]{padding:.34375em .625em}input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type="search"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}input[type="text"]:focus,input[type="password"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus{outline:0;outline:thin dotted \9;border-color:#333}input.no-focus:focus{border-color:#ccc !important}input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:1px auto #129FEA}input[type="text"][disabled],input[type="password"][disabled],input[type="email"][disabled],input[type="url"][disabled],input[type="date"][disabled],input[type="month"][disabled],input[type="time"][disabled],input[type="datetime"][disabled],input[type="datetime-local"][disabled],input[type="week"][disabled],input[type="number"][disabled],input[type="search"][disabled],input[type="tel"][disabled],input[type="color"][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#E74C3C;border:1px solid #E74C3C}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#E74C3C}input[type="file"]:focus:invalid:focus,input[type="radio"]:focus:invalid:focus,input[type="checkbox"]:focus:invalid:focus{outline-color:#E74C3C}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type="radio"][disabled],input[type="checkbox"][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:solid 1px #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{position:absolute;content:"";display:block;left:0;top:0;width:36px;height:12px;border-radius:4px;background:#ccc;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{position:absolute;content:"";display:block;width:18px;height:18px;border-radius:4px;background:#999;left:-3px;top:-3px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27AE60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#E74C3C}.wy-control-group.wy-control-group-error input[type="text"],.wy-control-group.wy-control-group-error input[type="password"],.wy-control-group.wy-control-group-error input[type="email"],.wy-control-group.wy-control-group-error input[type="url"],.wy-control-group.wy-control-group-error input[type="date"],.wy-control-group.wy-control-group-error input[type="month"],.wy-control-group.wy-control-group-error input[type="time"],.wy-control-group.wy-control-group-error input[type="datetime"],.wy-control-group.wy-control-group-error input[type="datetime-local"],.wy-control-group.wy-control-group-error input[type="week"],.wy-control-group.wy-control-group-error input[type="number"],.wy-control-group.wy-control-group-error input[type="search"],.wy-control-group.wy-control-group-error input[type="tel"],.wy-control-group.wy-control-group-error input[type="color"]{border:solid 1px #E74C3C}.wy-control-group.wy-control-group-error textarea{border:solid 1px #E74C3C}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27AE60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#E74C3C}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#E67E22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980B9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width: 480px){.wy-form button[type="submit"]{margin:.7em 0 0}.wy-form input[type="text"],.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:.3em;display:block}.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0 0}.wy-form .wy-help-inline,.wy-form-message-inline,.wy-form-message{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width: 768px){.tablet-hide{display:none}}@media screen and (max-width: 480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.wy-table,.rst-content table.docutils,.rst-content table.field-list{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.wy-table caption,.rst-content table.docutils caption,.rst-content table.field-list caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td,.wy-table th,.rst-content table.docutils th,.rst-content table.field-list th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.wy-table td:first-child,.rst-content table.docutils td:first-child,.rst-content table.field-list td:first-child,.wy-table th:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list th:first-child{border-left-width:0}.wy-table thead,.rst-content table.docutils thead,.rst-content table.field-list thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.wy-table thead th,.rst-content table.docutils thead th,.rst-content table.field-list thead th{font-weight:bold;border-bottom:solid 2px #e1e4e5}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td{background-color:transparent;vertical-align:middle}.wy-table td p,.rst-content table.docutils td p,.rst-content table.field-list td p{line-height:18px}.wy-table td p:last-child,.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child{margin-bottom:0}.wy-table .wy-table-cell-min,.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min{width:1%;padding-right:0}.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:gray;font-size:90%}.wy-table-tertiary{color:gray;font-size:80%}.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td,.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td{background-color:#f3f6f6}.wy-table-backed{background-color:#f3f6f6}.wy-table-bordered-all,.rst-content table.docutils{border:1px solid #e1e4e5}.wy-table-bordered-all td,.rst-content table.docutils td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.wy-table-bordered-all tbody>tr:last-child td,.rst-content table.docutils tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px 0;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0 !important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980B9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9B59B6}html{height:100%;overflow-x:hidden}body{font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;font-weight:normal;color:#404040;min-height:100%;overflow-x:hidden;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#E67E22 !important}a.wy-text-warning:hover{color:#eb9950 !important}.wy-text-info{color:#2980B9 !important}a.wy-text-info:hover{color:#409ad5 !important}.wy-text-success{color:#27AE60 !important}a.wy-text-success:hover{color:#36d278 !important}.wy-text-danger{color:#E74C3C !important}a.wy-text-danger:hover{color:#ed7669 !important}.wy-text-neutral{color:#404040 !important}a.wy-text-neutral:hover{color:#595959 !important}h1,h2,.rst-content .toctree-wrapper p.caption,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif}p{line-height:24px;margin:0;font-size:16px;margin-bottom:24px}h1{font-size:175%}h2,.rst-content .toctree-wrapper p.caption{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}code,.rst-content tt,.rst-content code{white-space:nowrap;max-width:100%;background:#fff;border:solid 1px #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;color:#E74C3C;overflow-x:auto}code.code-large,.rst-content tt.code-large{font-size:90%}.wy-plain-list-disc,.rst-content .section ul,.rst-content .toctree-wrapper ul,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.wy-plain-list-disc li,.rst-content .section ul li,.rst-content .toctree-wrapper ul li,article ul li{list-style:disc;margin-left:24px}.wy-plain-list-disc li p:last-child,.rst-content .section ul li p:last-child,.rst-content .toctree-wrapper ul li p:last-child,article ul li p:last-child{margin-bottom:0}.wy-plain-list-disc li ul,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li ul,article ul li ul{margin-bottom:0}.wy-plain-list-disc li li,.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,article ul li li{list-style:circle}.wy-plain-list-disc li li li,.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,article ul li li li{list-style:square}.wy-plain-list-disc li ol li,.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,article ul li ol li{list-style:decimal}.wy-plain-list-decimal,.rst-content .section ol,.rst-content ol.arabic,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.wy-plain-list-decimal li,.rst-content .section ol li,.rst-content ol.arabic li,article ol li{list-style:decimal;margin-left:24px}.wy-plain-list-decimal li p:last-child,.rst-content .section ol li p:last-child,.rst-content ol.arabic li p:last-child,article ol li p:last-child{margin-bottom:0}.wy-plain-list-decimal li ul,.rst-content .section ol li ul,.rst-content ol.arabic li ul,article ol li ul{margin-bottom:0}.wy-plain-list-decimal li ul li,.rst-content .section ol li ul li,.rst-content ol.arabic li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:before,.wy-breadcrumbs:after{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs li{display:inline-block}.wy-breadcrumbs li.wy-breadcrumbs-aside{float:right}.wy-breadcrumbs li a{display:inline-block;padding:5px}.wy-breadcrumbs li a:first-child{padding-left:0}.wy-breadcrumbs li code,.wy-breadcrumbs li .rst-content tt,.rst-content .wy-breadcrumbs li tt{padding:5px;border:none;background:none}.wy-breadcrumbs li code.literal,.wy-breadcrumbs li .rst-content tt.literal,.rst-content .wy-breadcrumbs li tt.literal{color:#404040}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width: 480px){.wy-breadcrumbs-extra{display:none}.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:before,.wy-menu-horiz:after{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz ul,.wy-menu-horiz li{display:inline-block}.wy-menu-horiz li:hover{background:rgba(255,255,255,0.1)}.wy-menu-horiz li.divide-left{border-left:solid 1px #404040}.wy-menu-horiz li.divide-right{border-right:solid 1px #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{height:32px;display:inline-block;line-height:32px;padding:0 1.618em;margin-bottom:0;display:block;font-weight:bold;text-transform:uppercase;font-size:80%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:solid 1px #404040}.wy-menu-vertical li.divide-bottom{border-bottom:solid 1px #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:gray;border-right:solid 1px #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.wy-menu-vertical li code,.wy-menu-vertical li .rst-content tt,.rst-content .wy-menu-vertical li tt{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li span.toctree-expand{display:block;float:left;margin-left:-1.2em;font-size:.8em;line-height:1.6em;color:#4d4d4d}.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a{color:#404040;padding:.4045em 1.618em;font-weight:bold;position:relative;background:#fcfcfc;border:none;padding-left:1.618em -4px}.wy-menu-vertical li.on a:hover,.wy-menu-vertical li.current>a:hover{background:#fcfcfc}.wy-menu-vertical li.on a:hover span.toctree-expand,.wy-menu-vertical li.current>a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand{display:block;font-size:.8em;line-height:1.6em;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:solid 1px #c9c9c9;border-top:solid 1px #c9c9c9}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a{color:#404040}.wy-menu-vertical li.toctree-l1.current li.toctree-l2>ul,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>ul{display:none}.wy-menu-vertical li.toctree-l1.current li.toctree-l2.current>ul,.wy-menu-vertical li.toctree-l2.current li.toctree-l3.current>ul{display:block}.wy-menu-vertical li.toctree-l2.current>a{background:#c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{display:block;background:#c9c9c9;padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l2 a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.toctree-l2 span.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3{font-size:.9em}.wy-menu-vertical li.toctree-l3.current>a{background:#bdbdbd;padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{display:block;background:#bdbdbd;padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l3 a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.toctree-l3 span.toctree-expand{color:#969696}.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:normal}.wy-menu-vertical a{display:inline-block;line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover span.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980B9;cursor:pointer;color:#fff}.wy-menu-vertical a:active span.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980B9;text-align:center;padding:.809em;display:block;color:#fcfcfc;margin-bottom:.809em}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em auto;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a{color:#fcfcfc;font-size:100%;font-weight:bold;display:inline-block;padding:4px 6px;margin-bottom:.809em}.wy-side-nav-search>a:hover,.wy-side-nav-search .wy-dropdown>a:hover{background:rgba(255,255,255,0.1)}.wy-side-nav-search>a img.logo,.wy-side-nav-search .wy-dropdown>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search>a.icon img.logo,.wy-side-nav-search .wy-dropdown>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:normal;color:rgba(255,255,255,0.3)}.wy-nav .wy-menu-vertical header{color:#2980B9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980B9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980B9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:before,.wy-nav-top:after{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:bold}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,0.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:gray}footer p{margin-bottom:12px}footer span.commit code,footer span.commit .rst-content tt,.rst-content footer span.commit tt{padding:0px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;font-size:1em;background:none;border:none;color:gray}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:before,.rst-footer-buttons:after{width:100%}.rst-footer-buttons:before,.rst-footer-buttons:after{display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:before,.rst-breadcrumbs-buttons:after{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:solid 1px #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:solid 1px #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:gray;font-size:90%}@media screen and (max-width: 768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-side-scroll{width:auto}.wy-side-nav-search{width:auto}.wy-menu.wy-menu-vertical{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width: 1100px){.wy-nav-content-wrap{background:rgba(0,0,0,0.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,footer,.wy-nav-side{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version span.toctree-expand,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content p.caption .headerlink,.rst-content p.caption .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .icon{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content img{max-width:100%;height:auto}.rst-content div.figure{margin-bottom:24px}.rst-content div.figure p.caption{font-style:italic}.rst-content div.figure p:last-child.caption{margin-bottom:0px}.rst-content div.figure.align-center{text-align:center}.rst-content .section>img,.rst-content .section>a>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px 12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;display:block;overflow:auto}.rst-content pre.literal-block,.rst-content div[class^='highlight']{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px 0}.rst-content pre.literal-block div[class^='highlight'],.rst-content div[class^='highlight'] div[class^='highlight']{padding:0px;border:none;margin:0}.rst-content div[class^='highlight'] td.code{width:100%}.rst-content .linenodiv pre{border-right:solid 1px #e6e9ea;margin:0;padding:12px 12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^='highlight'] pre{white-space:pre;margin:0;padding:12px 12px;display:block;overflow:auto}.rst-content div[class^='highlight'] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content pre.literal-block,.rst-content div[class^='highlight'] pre,.rst-content .linenodiv pre{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;font-size:12px;line-height:1.4}@media print{.rst-content .codeblock,.rst-content div[class^='highlight'],.rst-content div[class^='highlight'] pre{white-space:pre-wrap}}.rst-content .note .last,.rst-content .attention .last,.rst-content .caution .last,.rst-content .danger .last,.rst-content .error .last,.rst-content .hint .last,.rst-content .important .last,.rst-content .tip .last,.rst-content .warning .last,.rst-content .seealso .last,.rst-content .admonition-todo .last,.rst-content .admonition .last{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,0.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent !important;border-color:rgba(0,0,0,0.1) !important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha li{list-style:upper-alpha}.rst-content .section ol p,.rst-content .section ul p{margin-bottom:12px}.rst-content .section ol p:last-child,.rst-content .section ul p:last-child{margin-bottom:24px}.rst-content .line-block{margin-left:0px;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0px}.rst-content .topic-title{font-weight:bold;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0px 0px 24px 24px}.rst-content .align-left{float:left;margin:0px 24px 24px 0px}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content .toctree-wrapper p.caption .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink{visibility:hidden;font-size:14px}.rst-content h1 .headerlink:after,.rst-content h2 .headerlink:after,.rst-content .toctree-wrapper p.caption .headerlink:after,.rst-content h3 .headerlink:after,.rst-content h4 .headerlink:after,.rst-content h5 .headerlink:after,.rst-content h6 .headerlink:after,.rst-content dl dt .headerlink:after,.rst-content p.caption .headerlink:after,.rst-content table>caption .headerlink:after{content:"";font-family:FontAwesome}.rst-content h1:hover .headerlink:after,.rst-content h2:hover .headerlink:after,.rst-content .toctree-wrapper p.caption:hover .headerlink:after,.rst-content h3:hover .headerlink:after,.rst-content h4:hover .headerlink:after,.rst-content h5:hover .headerlink:after,.rst-content h6:hover .headerlink:after,.rst-content dl dt:hover .headerlink:after,.rst-content p.caption:hover .headerlink:after,.rst-content table>caption:hover .headerlink:after{visibility:visible}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:solid 1px #e1e4e5}.rst-content .sidebar p,.rst-content .sidebar ul,.rst-content .sidebar dl{font-size:90%}.rst-content .sidebar .last{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif;font-weight:bold;background:#e1e4e5;padding:6px 12px;margin:-24px;margin-bottom:24px;font-size:100%}.rst-content .highlighted{background:#F1C40F;display:inline-block;font-weight:bold;padding:0 6px}.rst-content .footnote-reference,.rst-content .citation-reference{vertical-align:baseline;position:relative;top:-0.4em;line-height:0;font-size:90%}.rst-content table.docutils.citation,.rst-content table.docutils.footnote{background:none;border:none;color:gray}.rst-content table.docutils.citation td,.rst-content table.docutils.citation tr,.rst-content table.docutils.footnote td,.rst-content table.docutils.footnote tr{border:none;background-color:transparent !important;white-space:normal}.rst-content table.docutils.citation td.label,.rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}.rst-content table.docutils.citation tt,.rst-content table.docutils.citation code,.rst-content table.docutils.footnote tt,.rst-content table.docutils.footnote code{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}.rst-content table.docutils td .last,.rst-content table.docutils td .last :last-child{margin-bottom:0}.rst-content table.field-list{border:none}.rst-content table.field-list td{border:none}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content tt,.rst-content tt,.rst-content code{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;padding:2px 5px}.rst-content tt big,.rst-content tt em,.rst-content tt big,.rst-content code big,.rst-content tt em,.rst-content code em{font-size:100% !important;line-height:normal}.rst-content tt.literal,.rst-content tt.literal,.rst-content code.literal{color:#E74C3C}.rst-content tt.xref,a .rst-content tt,.rst-content tt.xref,.rst-content code.xref,a .rst-content tt,a .rst-content code{font-weight:bold;color:#404040}.rst-content pre,.rst-content kbd,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace}.rst-content a tt,.rst-content a tt,.rst-content a code{color:#2980B9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:bold;margin-bottom:12px}.rst-content dl p,.rst-content dl table,.rst-content dl ul,.rst-content dl ol{margin-bottom:12px !important}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl:not(.docutils){margin-bottom:24px}.rst-content dl:not(.docutils) dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980B9;border-top:solid 3px #6ab0de;padding:6px;position:relative}.rst-content dl:not(.docutils) dt:before{color:#6ab0de}.rst-content dl:not(.docutils) dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dl dt{margin-bottom:6px;border:none;border-left:solid 3px #ccc;background:#f0f0f0;color:#555}.rst-content dl:not(.docutils) dl dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dt:first-child{margin-top:0}.rst-content dl:not(.docutils) tt,.rst-content dl:not(.docutils) tt,.rst-content dl:not(.docutils) code{font-weight:bold}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descclassname,.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) code.descname,.rst-content dl:not(.docutils) tt.descclassname,.rst-content dl:not(.docutils) code.descclassname{background-color:transparent;border:none;padding:0;font-size:100% !important}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) code.descname{font-weight:bold}.rst-content dl:not(.docutils) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:bold}.rst-content dl:not(.docutils) .property{display:inline-block;padding-right:8px}.rst-content .viewcode-link,.rst-content .viewcode-back{display:inline-block;color:#27AE60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:bold}.rst-content tt.download,.rst-content code.download{background:inherit;padding:inherit;font-weight:normal;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content tt.download span:first-child,.rst-content code.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before{margin-right:4px}.rst-content .guilabel{border:1px solid #7fbbe3;background:#e7f2fa;font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .versionmodified{font-style:italic}@media screen and (max-width: 480px){.rst-content .sidebar{width:100%}}span[id*='MathJax-Span']{color:#404040}.math{text-align:center}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-regular.eot");src:url("../fonts/Lato/lato-regular.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-regular.woff2") format("woff2"),url("../fonts/Lato/lato-regular.woff") format("woff"),url("../fonts/Lato/lato-regular.ttf") format("truetype");font-weight:400;font-style:normal}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-bold.eot");src:url("../fonts/Lato/lato-bold.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-bold.woff2") format("woff2"),url("../fonts/Lato/lato-bold.woff") format("woff"),url("../fonts/Lato/lato-bold.ttf") format("truetype");font-weight:700;font-style:normal}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-bolditalic.eot");src:url("../fonts/Lato/lato-bolditalic.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-bolditalic.woff2") format("woff2"),url("../fonts/Lato/lato-bolditalic.woff") format("woff"),url("../fonts/Lato/lato-bolditalic.ttf") format("truetype");font-weight:700;font-style:italic}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-italic.eot");src:url("../fonts/Lato/lato-italic.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-italic.woff2") format("woff2"),url("../fonts/Lato/lato-italic.woff") format("woff"),url("../fonts/Lato/lato-italic.ttf") format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"Roboto Slab";font-style:normal;font-weight:400;src:url("../fonts/RobotoSlab/roboto-slab.eot");src:url("../fonts/RobotoSlab/roboto-slab-v7-regular.eot?#iefix") format("embedded-opentype"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.woff2") format("woff2"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.woff") format("woff"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.ttf") format("truetype")}@font-face{font-family:"Roboto Slab";font-style:normal;font-weight:700;src:url("../fonts/RobotoSlab/roboto-slab-v7-bold.eot");src:url("../fonts/RobotoSlab/roboto-slab-v7-bold.eot?#iefix") format("embedded-opentype"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.woff2") format("woff2"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.woff") format("woff"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.ttf") format("truetype")} diff --git a/docs/_static/dark_logo.png b/docs/_static/dark_logo.png new file mode 100644 index 0000000..7e38ee6 Binary files /dev/null and b/docs/_static/dark_logo.png differ diff --git a/docs/_static/doctools.js b/docs/_static/doctools.js new file mode 100644 index 0000000..ffadbec --- /dev/null +++ b/docs/_static/doctools.js @@ -0,0 +1,315 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for all documentation. + * + * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/** + * select a different prefix for underscore + */ +$u = _.noConflict(); + +/** + * make the code below compatible with browsers without + * an installed firebug like debugger +if (!window.console || !console.firebug) { + var names = ["log", "debug", "info", "warn", "error", "assert", "dir", + "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", + "profile", "profileEnd"]; + window.console = {}; + for (var i = 0; i < names.length; ++i) + window.console[names[i]] = function() {}; +} + */ + +/** + * small helper function to urldecode strings + */ +jQuery.urldecode = function(x) { + return decodeURIComponent(x).replace(/\+/g, ' '); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var bbox = span.getBBox(); + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + var parentOfText = node.parentNode.parentNode; + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} + +/** + * Small JavaScript module for the documentation. + */ +var Documentation = { + + init : function() { + this.fixFirefoxAnchorBug(); + this.highlightSearchWords(); + this.initIndexTable(); + if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) { + this.initOnKeyListeners(); + } + }, + + /** + * i18n support + */ + TRANSLATIONS : {}, + PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; }, + LOCALE : 'unknown', + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext : function(string) { + var translated = Documentation.TRANSLATIONS[string]; + if (typeof translated === 'undefined') + return string; + return (typeof translated === 'string') ? translated : translated[0]; + }, + + ngettext : function(singular, plural, n) { + var translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated === 'undefined') + return (n == 1) ? singular : plural; + return translated[Documentation.PLURALEXPR(n)]; + }, + + addTranslations : function(catalog) { + for (var key in catalog.messages) + this.TRANSLATIONS[key] = catalog.messages[key]; + this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); + this.LOCALE = catalog.locale; + }, + + /** + * add context elements like header anchor links + */ + addContextElements : function() { + $('div[id] > :header:first').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this headline')). + appendTo(this); + }); + $('dt[id]').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this definition')). + appendTo(this); + }); + }, + + /** + * workaround a firefox stupidity + * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 + */ + fixFirefoxAnchorBug : function() { + if (document.location.hash && $.browser.mozilla) + window.setTimeout(function() { + document.location.href += ''; + }, 10); + }, + + /** + * highlight the search words provided in the url in the text + */ + highlightSearchWords : function() { + var params = $.getQueryParameters(); + var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; + if (terms.length) { + var body = $('div.body'); + if (!body.length) { + body = $('body'); + } + window.setTimeout(function() { + $.each(terms, function() { + body.highlightText(this.toLowerCase(), 'highlighted'); + }); + }, 10); + $('') + .appendTo($('#searchbox')); + } + }, + + /** + * init the domain index toggle buttons + */ + initIndexTable : function() { + var togglers = $('img.toggler').click(function() { + var src = $(this).attr('src'); + var idnum = $(this).attr('id').substr(7); + $('tr.cg-' + idnum).toggle(); + if (src.substr(-9) === 'minus.png') + $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); + else + $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); + }).css('display', ''); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { + togglers.click(); + } + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords : function() { + $('#searchbox .highlight-link').fadeOut(300); + $('span.highlighted').removeClass('highlighted'); + }, + + /** + * make the url absolute + */ + makeURL : function(relativeURL) { + return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; + }, + + /** + * get the current relative url + */ + getCurrentURL : function() { + var path = document.location.pathname; + var parts = path.split(/\//); + $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { + if (this === '..') + parts.pop(); + }); + var url = parts.join('/'); + return path.substring(url.lastIndexOf('/') + 1, path.length - 1); + }, + + initOnKeyListeners: function() { + $(document).keyup(function(event) { + var activeElementType = document.activeElement.tagName; + // don't navigate when in search box or textarea + if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') { + switch (event.keyCode) { + case 37: // left + var prevHref = $('link[rel="prev"]').prop('href'); + if (prevHref) { + window.location.href = prevHref; + return false; + } + case 39: // right + var nextHref = $('link[rel="next"]').prop('href'); + if (nextHref) { + window.location.href = nextHref; + return false; + } + } + } + }); + } +}; + +// quick alias for translations +_ = Documentation.gettext; + +$(document).ready(function() { + Documentation.init(); +}); diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js new file mode 100644 index 0000000..4e5aeec --- /dev/null +++ b/docs/_static/documentation_options.js @@ -0,0 +1,296 @@ +var DOCUMENTATION_OPTIONS = { + URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), + VERSION: '0.11.0', + LANGUAGE: 'None', + COLLAPSE_INDEX: false, + FILE_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SEARCH_LANGUAGE_STOP_WORDS: ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"] +}; + + + +/* Non-minified version JS is _stemmer.js if file is provided */ +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + + + + + +var splitChars = (function() { + var result = {}; + var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, + 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, + 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, + 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, + 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, + 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, + 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, + 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, + 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, + 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; + var i, j, start, end; + for (i = 0; i < singles.length; i++) { + result[singles[i]] = true; + } + var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], + [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], + [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], + [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], + [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], + [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], + [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], + [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], + [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], + [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], + [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], + [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], + [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], + [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], + [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], + [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], + [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], + [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], + [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], + [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], + [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], + [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], + [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], + [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], + [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], + [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], + [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], + [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], + [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], + [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], + [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], + [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], + [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], + [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], + [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], + [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], + [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], + [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], + [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], + [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], + [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], + [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], + [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], + [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], + [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], + [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], + [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], + [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], + [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; + for (i = 0; i < ranges.length; i++) { + start = ranges[i][0]; + end = ranges[i][1]; + for (j = start; j <= end; j++) { + result[j] = true; + } + } + return result; +})(); + +function splitQuery(query) { + var result = []; + var start = -1; + for (var i = 0; i < query.length; i++) { + if (splitChars[query.charCodeAt(i)]) { + if (start !== -1) { + result.push(query.slice(start, i)); + start = -1; + } + } else if (start === -1) { + start = i; + } + } + if (start !== -1) { + result.push(query.slice(start)); + } + return result; +} + + diff --git a/docs/_static/down-pressed.png b/docs/_static/down-pressed.png new file mode 100644 index 0000000..5756c8c Binary files /dev/null and b/docs/_static/down-pressed.png differ diff --git a/docs/_static/down.png b/docs/_static/down.png new file mode 100644 index 0000000..1b3bdad Binary files /dev/null and b/docs/_static/down.png differ diff --git a/docs/_static/file.png b/docs/_static/file.png new file mode 100644 index 0000000..a858a41 Binary files /dev/null and b/docs/_static/file.png differ diff --git a/docs/_static/fonts/Inconsolata-Bold.ttf b/docs/_static/fonts/Inconsolata-Bold.ttf new file mode 100644 index 0000000..809c1f5 Binary files /dev/null and b/docs/_static/fonts/Inconsolata-Bold.ttf differ diff --git a/docs/_static/fonts/Inconsolata-Regular.ttf b/docs/_static/fonts/Inconsolata-Regular.ttf new file mode 100644 index 0000000..fc981ce Binary files /dev/null and b/docs/_static/fonts/Inconsolata-Regular.ttf differ diff --git a/docs/_static/fonts/Inconsolata.ttf b/docs/_static/fonts/Inconsolata.ttf new file mode 100644 index 0000000..4b8a36d Binary files /dev/null and b/docs/_static/fonts/Inconsolata.ttf differ diff --git a/docs/_static/fonts/Lato-Bold.ttf b/docs/_static/fonts/Lato-Bold.ttf new file mode 100644 index 0000000..1d23c70 Binary files /dev/null and b/docs/_static/fonts/Lato-Bold.ttf differ diff --git a/docs/_static/fonts/Lato-Regular.ttf b/docs/_static/fonts/Lato-Regular.ttf new file mode 100644 index 0000000..0f3d0f8 Binary files /dev/null and b/docs/_static/fonts/Lato-Regular.ttf differ diff --git a/docs/_static/fonts/Lato/lato-bold.eot b/docs/_static/fonts/Lato/lato-bold.eot new file mode 100644 index 0000000..3361183 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bold.eot differ diff --git a/docs/_static/fonts/Lato/lato-bold.ttf b/docs/_static/fonts/Lato/lato-bold.ttf new file mode 100644 index 0000000..29f691d Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bold.ttf differ diff --git a/docs/_static/fonts/Lato/lato-bold.woff b/docs/_static/fonts/Lato/lato-bold.woff new file mode 100644 index 0000000..c6dff51 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bold.woff differ diff --git a/docs/_static/fonts/Lato/lato-bold.woff2 b/docs/_static/fonts/Lato/lato-bold.woff2 new file mode 100644 index 0000000..bb19504 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bold.woff2 differ diff --git a/docs/_static/fonts/Lato/lato-bolditalic.eot b/docs/_static/fonts/Lato/lato-bolditalic.eot new file mode 100644 index 0000000..3d41549 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bolditalic.eot differ diff --git a/docs/_static/fonts/Lato/lato-bolditalic.ttf b/docs/_static/fonts/Lato/lato-bolditalic.ttf new file mode 100644 index 0000000..f402040 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bolditalic.ttf differ diff --git a/docs/_static/fonts/Lato/lato-bolditalic.woff b/docs/_static/fonts/Lato/lato-bolditalic.woff new file mode 100644 index 0000000..88ad05b Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bolditalic.woff differ diff --git a/docs/_static/fonts/Lato/lato-bolditalic.woff2 b/docs/_static/fonts/Lato/lato-bolditalic.woff2 new file mode 100644 index 0000000..c4e3d80 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-bolditalic.woff2 differ diff --git a/docs/_static/fonts/Lato/lato-italic.eot b/docs/_static/fonts/Lato/lato-italic.eot new file mode 100644 index 0000000..3f82642 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-italic.eot differ diff --git a/docs/_static/fonts/Lato/lato-italic.ttf b/docs/_static/fonts/Lato/lato-italic.ttf new file mode 100644 index 0000000..b4bfc9b Binary files /dev/null and b/docs/_static/fonts/Lato/lato-italic.ttf differ diff --git a/docs/_static/fonts/Lato/lato-italic.woff b/docs/_static/fonts/Lato/lato-italic.woff new file mode 100644 index 0000000..76114bc Binary files /dev/null and b/docs/_static/fonts/Lato/lato-italic.woff differ diff --git a/docs/_static/fonts/Lato/lato-italic.woff2 b/docs/_static/fonts/Lato/lato-italic.woff2 new file mode 100644 index 0000000..3404f37 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-italic.woff2 differ diff --git a/docs/_static/fonts/Lato/lato-regular.eot b/docs/_static/fonts/Lato/lato-regular.eot new file mode 100644 index 0000000..11e3f2a Binary files /dev/null and b/docs/_static/fonts/Lato/lato-regular.eot differ diff --git a/docs/_static/fonts/Lato/lato-regular.ttf b/docs/_static/fonts/Lato/lato-regular.ttf new file mode 100644 index 0000000..74decd9 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-regular.ttf differ diff --git a/docs/_static/fonts/Lato/lato-regular.woff b/docs/_static/fonts/Lato/lato-regular.woff new file mode 100644 index 0000000..ae1307f Binary files /dev/null and b/docs/_static/fonts/Lato/lato-regular.woff differ diff --git a/docs/_static/fonts/Lato/lato-regular.woff2 b/docs/_static/fonts/Lato/lato-regular.woff2 new file mode 100644 index 0000000..3bf9843 Binary files /dev/null and b/docs/_static/fonts/Lato/lato-regular.woff2 differ diff --git a/docs/_static/fonts/RobotoSlab-Bold.ttf b/docs/_static/fonts/RobotoSlab-Bold.ttf new file mode 100644 index 0000000..df5d1df Binary files /dev/null and b/docs/_static/fonts/RobotoSlab-Bold.ttf differ diff --git a/docs/_static/fonts/RobotoSlab-Regular.ttf b/docs/_static/fonts/RobotoSlab-Regular.ttf new file mode 100644 index 0000000..eb52a79 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab-Regular.ttf differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot new file mode 100644 index 0000000..79dc8ef Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf new file mode 100644 index 0000000..df5d1df Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff new file mode 100644 index 0000000..6cb6000 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 new file mode 100644 index 0000000..7059e23 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot new file mode 100644 index 0000000..2f7ca78 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf new file mode 100644 index 0000000..eb52a79 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff new file mode 100644 index 0000000..f815f63 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff differ diff --git a/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 new file mode 100644 index 0000000..f2c76e5 Binary files /dev/null and b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 differ diff --git a/docs/_static/fonts/fontawesome-webfont.eot b/docs/_static/fonts/fontawesome-webfont.eot new file mode 100644 index 0000000..e9f60ca Binary files /dev/null and b/docs/_static/fonts/fontawesome-webfont.eot differ diff --git a/docs/_static/fonts/fontawesome-webfont.svg b/docs/_static/fonts/fontawesome-webfont.svg new file mode 100644 index 0000000..855c845 --- /dev/null +++ b/docs/_static/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/fonts/fontawesome-webfont.ttf b/docs/_static/fonts/fontawesome-webfont.ttf new file mode 100644 index 0000000..35acda2 Binary files /dev/null and b/docs/_static/fonts/fontawesome-webfont.ttf differ diff --git a/docs/_static/fonts/fontawesome-webfont.woff b/docs/_static/fonts/fontawesome-webfont.woff new file mode 100644 index 0000000..400014a Binary files /dev/null and b/docs/_static/fonts/fontawesome-webfont.woff differ diff --git a/docs/_static/fonts/fontawesome-webfont.woff2 b/docs/_static/fonts/fontawesome-webfont.woff2 new file mode 100644 index 0000000..4d13fc6 Binary files /dev/null and b/docs/_static/fonts/fontawesome-webfont.woff2 differ diff --git a/docs/_static/jquery-3.2.1.js b/docs/_static/jquery-3.2.1.js new file mode 100644 index 0000000..d2d8ca4 --- /dev/null +++ b/docs/_static/jquery-3.2.1.js @@ -0,0 +1,10253 @@ +/*! + * jQuery JavaScript Library v3.2.1 + * https://jquery.com/ + * + * Includes Sizzle.js + * https://sizzlejs.com/ + * + * Copyright JS Foundation and other contributors + * Released under the MIT license + * https://jquery.org/license + * + * Date: 2017-03-20T18:59Z + */ +( function( global, factory ) { + + "use strict"; + + if ( typeof module === "object" && typeof module.exports === "object" ) { + + // For CommonJS and CommonJS-like environments where a proper `window` + // is present, execute the factory and get jQuery. + // For environments that do not have a `window` with a `document` + // (such as Node.js), expose a factory as module.exports. + // This accentuates the need for the creation of a real `window`. + // e.g. var jQuery = require("jquery")(window); + // See ticket #14549 for more info. + module.exports = global.document ? + factory( global, true ) : + function( w ) { + if ( !w.document ) { + throw new Error( "jQuery requires a window with a document" ); + } + return factory( w ); + }; + } else { + factory( global ); + } + +// Pass this if window is not defined yet +} )( typeof window !== "undefined" ? window : this, function( window, noGlobal ) { + +// Edge <= 12 - 13+, Firefox <=18 - 45+, IE 10 - 11, Safari 5.1 - 9+, iOS 6 - 9.1 +// throw exceptions when non-strict code (e.g., ASP.NET 4.5) accesses strict mode +// arguments.callee.caller (trac-13335). But as of jQuery 3.0 (2016), strict mode should be common +// enough that all such attempts are guarded in a try block. +"use strict"; + +var arr = []; + +var document = window.document; + +var getProto = Object.getPrototypeOf; + +var slice = arr.slice; + +var concat = arr.concat; + +var push = arr.push; + +var indexOf = arr.indexOf; + +var class2type = {}; + +var toString = class2type.toString; + +var hasOwn = class2type.hasOwnProperty; + +var fnToString = hasOwn.toString; + +var ObjectFunctionString = fnToString.call( Object ); + +var support = {}; + + + + function DOMEval( code, doc ) { + doc = doc || document; + + var script = doc.createElement( "script" ); + + script.text = code; + doc.head.appendChild( script ).parentNode.removeChild( script ); + } +/* global Symbol */ +// Defining this global in .eslintrc.json would create a danger of using the global +// unguarded in another place, it seems safer to define global only for this module + + + +var + version = "3.2.1", + + // Define a local copy of jQuery + jQuery = function( selector, context ) { + + // The jQuery object is actually just the init constructor 'enhanced' + // Need init if jQuery is called (just allow error to be thrown if not included) + return new jQuery.fn.init( selector, context ); + }, + + // Support: Android <=4.0 only + // Make sure we trim BOM and NBSP + rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g, + + // Matches dashed string for camelizing + rmsPrefix = /^-ms-/, + rdashAlpha = /-([a-z])/g, + + // Used by jQuery.camelCase as callback to replace() + fcamelCase = function( all, letter ) { + return letter.toUpperCase(); + }; + +jQuery.fn = jQuery.prototype = { + + // The current version of jQuery being used + jquery: version, + + constructor: jQuery, + + // The default length of a jQuery object is 0 + length: 0, + + toArray: function() { + return slice.call( this ); + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + + // Return all the elements in a clean array + if ( num == null ) { + return slice.call( this ); + } + + // Return just the one element from the set + return num < 0 ? this[ num + this.length ] : this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems ) { + + // Build a new jQuery matched element set + var ret = jQuery.merge( this.constructor(), elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + // Return the newly-formed element set + return ret; + }, + + // Execute a callback for every element in the matched set. + each: function( callback ) { + return jQuery.each( this, callback ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map( this, function( elem, i ) { + return callback.call( elem, i, elem ); + } ) ); + }, + + slice: function() { + return this.pushStack( slice.apply( this, arguments ) ); + }, + + first: function() { + return this.eq( 0 ); + }, + + last: function() { + return this.eq( -1 ); + }, + + eq: function( i ) { + var len = this.length, + j = +i + ( i < 0 ? len : 0 ); + return this.pushStack( j >= 0 && j < len ? [ this[ j ] ] : [] ); + }, + + end: function() { + return this.prevObject || this.constructor(); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: push, + sort: arr.sort, + splice: arr.splice +}; + +jQuery.extend = jQuery.fn.extend = function() { + var options, name, src, copy, copyIsArray, clone, + target = arguments[ 0 ] || {}, + i = 1, + length = arguments.length, + deep = false; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + + // Skip the boolean and the target + target = arguments[ i ] || {}; + i++; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction( target ) ) { + target = {}; + } + + // Extend jQuery itself if only one argument is passed + if ( i === length ) { + target = this; + i--; + } + + for ( ; i < length; i++ ) { + + // Only deal with non-null/undefined values + if ( ( options = arguments[ i ] ) != null ) { + + // Extend the base object + for ( name in options ) { + src = target[ name ]; + copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) { + continue; + } + + // Recurse if we're merging plain objects or arrays + if ( deep && copy && ( jQuery.isPlainObject( copy ) || + ( copyIsArray = Array.isArray( copy ) ) ) ) { + + if ( copyIsArray ) { + copyIsArray = false; + clone = src && Array.isArray( src ) ? src : []; + + } else { + clone = src && jQuery.isPlainObject( src ) ? src : {}; + } + + // Never move original objects, clone them + target[ name ] = jQuery.extend( deep, clone, copy ); + + // Don't bring in undefined values + } else if ( copy !== undefined ) { + target[ name ] = copy; + } + } + } + } + + // Return the modified object + return target; +}; + +jQuery.extend( { + + // Unique for each copy of jQuery on the page + expando: "jQuery" + ( version + Math.random() ).replace( /\D/g, "" ), + + // Assume jQuery is ready without the ready module + isReady: true, + + error: function( msg ) { + throw new Error( msg ); + }, + + noop: function() {}, + + isFunction: function( obj ) { + return jQuery.type( obj ) === "function"; + }, + + isWindow: function( obj ) { + return obj != null && obj === obj.window; + }, + + isNumeric: function( obj ) { + + // As of jQuery 3.0, isNumeric is limited to + // strings and numbers (primitives or objects) + // that can be coerced to finite numbers (gh-2662) + var type = jQuery.type( obj ); + return ( type === "number" || type === "string" ) && + + // parseFloat NaNs numeric-cast false positives ("") + // ...but misinterprets leading-number strings, particularly hex literals ("0x...") + // subtraction forces infinities to NaN + !isNaN( obj - parseFloat( obj ) ); + }, + + isPlainObject: function( obj ) { + var proto, Ctor; + + // Detect obvious negatives + // Use toString instead of jQuery.type to catch host objects + if ( !obj || toString.call( obj ) !== "[object Object]" ) { + return false; + } + + proto = getProto( obj ); + + // Objects with no prototype (e.g., `Object.create( null )`) are plain + if ( !proto ) { + return true; + } + + // Objects with prototype are plain iff they were constructed by a global Object function + Ctor = hasOwn.call( proto, "constructor" ) && proto.constructor; + return typeof Ctor === "function" && fnToString.call( Ctor ) === ObjectFunctionString; + }, + + isEmptyObject: function( obj ) { + + /* eslint-disable no-unused-vars */ + // See https://github.com/eslint/eslint/issues/6125 + var name; + + for ( name in obj ) { + return false; + } + return true; + }, + + type: function( obj ) { + if ( obj == null ) { + return obj + ""; + } + + // Support: Android <=2.3 only (functionish RegExp) + return typeof obj === "object" || typeof obj === "function" ? + class2type[ toString.call( obj ) ] || "object" : + typeof obj; + }, + + // Evaluates a script in a global context + globalEval: function( code ) { + DOMEval( code ); + }, + + // Convert dashed to camelCase; used by the css and data modules + // Support: IE <=9 - 11, Edge 12 - 13 + // Microsoft forgot to hump their vendor prefix (#9572) + camelCase: function( string ) { + return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); + }, + + each: function( obj, callback ) { + var length, i = 0; + + if ( isArrayLike( obj ) ) { + length = obj.length; + for ( ; i < length; i++ ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } else { + for ( i in obj ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } + + return obj; + }, + + // Support: Android <=4.0 only + trim: function( text ) { + return text == null ? + "" : + ( text + "" ).replace( rtrim, "" ); + }, + + // results is for internal usage only + makeArray: function( arr, results ) { + var ret = results || []; + + if ( arr != null ) { + if ( isArrayLike( Object( arr ) ) ) { + jQuery.merge( ret, + typeof arr === "string" ? + [ arr ] : arr + ); + } else { + push.call( ret, arr ); + } + } + + return ret; + }, + + inArray: function( elem, arr, i ) { + return arr == null ? -1 : indexOf.call( arr, elem, i ); + }, + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + merge: function( first, second ) { + var len = +second.length, + j = 0, + i = first.length; + + for ( ; j < len; j++ ) { + first[ i++ ] = second[ j ]; + } + + first.length = i; + + return first; + }, + + grep: function( elems, callback, invert ) { + var callbackInverse, + matches = [], + i = 0, + length = elems.length, + callbackExpect = !invert; + + // Go through the array, only saving the items + // that pass the validator function + for ( ; i < length; i++ ) { + callbackInverse = !callback( elems[ i ], i ); + if ( callbackInverse !== callbackExpect ) { + matches.push( elems[ i ] ); + } + } + + return matches; + }, + + // arg is for internal usage only + map: function( elems, callback, arg ) { + var length, value, + i = 0, + ret = []; + + // Go through the array, translating each of the items to their new values + if ( isArrayLike( elems ) ) { + length = elems.length; + for ( ; i < length; i++ ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + + // Go through every key on the object, + } else { + for ( i in elems ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + } + + // Flatten any nested arrays + return concat.apply( [], ret ); + }, + + // A global GUID counter for objects + guid: 1, + + // Bind a function to a context, optionally partially applying any + // arguments. + proxy: function( fn, context ) { + var tmp, args, proxy; + + if ( typeof context === "string" ) { + tmp = fn[ context ]; + context = fn; + fn = tmp; + } + + // Quick check to determine if target is callable, in the spec + // this throws a TypeError, but we will just return undefined. + if ( !jQuery.isFunction( fn ) ) { + return undefined; + } + + // Simulated bind + args = slice.call( arguments, 2 ); + proxy = function() { + return fn.apply( context || this, args.concat( slice.call( arguments ) ) ); + }; + + // Set the guid of unique handler to the same of original handler, so it can be removed + proxy.guid = fn.guid = fn.guid || jQuery.guid++; + + return proxy; + }, + + now: Date.now, + + // jQuery.support is not used in Core but other projects attach their + // properties to it so it needs to exist. + support: support +} ); + +if ( typeof Symbol === "function" ) { + jQuery.fn[ Symbol.iterator ] = arr[ Symbol.iterator ]; +} + +// Populate the class2type map +jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), +function( i, name ) { + class2type[ "[object " + name + "]" ] = name.toLowerCase(); +} ); + +function isArrayLike( obj ) { + + // Support: real iOS 8.2 only (not reproducible in simulator) + // `in` check used to prevent JIT error (gh-2145) + // hasOwn isn't used here due to false negatives + // regarding Nodelist length in IE + var length = !!obj && "length" in obj && obj.length, + type = jQuery.type( obj ); + + if ( type === "function" || jQuery.isWindow( obj ) ) { + return false; + } + + return type === "array" || length === 0 || + typeof length === "number" && length > 0 && ( length - 1 ) in obj; +} +var Sizzle = +/*! + * Sizzle CSS Selector Engine v2.3.3 + * https://sizzlejs.com/ + * + * Copyright jQuery Foundation and other contributors + * Released under the MIT license + * http://jquery.org/license + * + * Date: 2016-08-08 + */ +(function( window ) { + +var i, + support, + Expr, + getText, + isXML, + tokenize, + compile, + select, + outermostContext, + sortInput, + hasDuplicate, + + // Local document vars + setDocument, + document, + docElem, + documentIsHTML, + rbuggyQSA, + rbuggyMatches, + matches, + contains, + + // Instance-specific data + expando = "sizzle" + 1 * new Date(), + preferredDoc = window.document, + dirruns = 0, + done = 0, + classCache = createCache(), + tokenCache = createCache(), + compilerCache = createCache(), + sortOrder = function( a, b ) { + if ( a === b ) { + hasDuplicate = true; + } + return 0; + }, + + // Instance methods + hasOwn = ({}).hasOwnProperty, + arr = [], + pop = arr.pop, + push_native = arr.push, + push = arr.push, + slice = arr.slice, + // Use a stripped-down indexOf as it's faster than native + // https://jsperf.com/thor-indexof-vs-for/5 + indexOf = function( list, elem ) { + var i = 0, + len = list.length; + for ( ; i < len; i++ ) { + if ( list[i] === elem ) { + return i; + } + } + return -1; + }, + + booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped", + + // Regular expressions + + // http://www.w3.org/TR/css3-selectors/#whitespace + whitespace = "[\\x20\\t\\r\\n\\f]", + + // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier + identifier = "(?:\\\\.|[\\w-]|[^\0-\\xa0])+", + + // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors + attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + + // Operator (capture 2) + "*([*^$|!~]?=)" + whitespace + + // "Attribute values must be CSS identifiers [capture 5] or strings [capture 3 or capture 4]" + "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + whitespace + + "*\\]", + + pseudos = ":(" + identifier + ")(?:\\((" + + // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: + // 1. quoted (capture 3; capture 4 or capture 5) + "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + + // 2. simple (capture 6) + "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + + // 3. anything else (capture 2) + ".*" + + ")\\)|)", + + // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter + rwhitespace = new RegExp( whitespace + "+", "g" ), + rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ), + + rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), + rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + "*" ), + + rattributeQuotes = new RegExp( "=" + whitespace + "*([^\\]'\"]*?)" + whitespace + "*\\]", "g" ), + + rpseudo = new RegExp( pseudos ), + ridentifier = new RegExp( "^" + identifier + "$" ), + + matchExpr = { + "ID": new RegExp( "^#(" + identifier + ")" ), + "CLASS": new RegExp( "^\\.(" + identifier + ")" ), + "TAG": new RegExp( "^(" + identifier + "|[*])" ), + "ATTR": new RegExp( "^" + attributes ), + "PSEUDO": new RegExp( "^" + pseudos ), + "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + whitespace + + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace + + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), + "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), + // For use in libraries implementing .is() + // We use this for POS matching in `select` + "needsContext": new RegExp( "^" + whitespace + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + + whitespace + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) + }, + + rinputs = /^(?:input|select|textarea|button)$/i, + rheader = /^h\d$/i, + + rnative = /^[^{]+\{\s*\[native \w/, + + // Easily-parseable/retrievable ID or TAG or CLASS selectors + rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/, + + rsibling = /[+~]/, + + // CSS escapes + // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters + runescape = new RegExp( "\\\\([\\da-f]{1,6}" + whitespace + "?|(" + whitespace + ")|.)", "ig" ), + funescape = function( _, escaped, escapedWhitespace ) { + var high = "0x" + escaped - 0x10000; + // NaN means non-codepoint + // Support: Firefox<24 + // Workaround erroneous numeric interpretation of +"0x" + return high !== high || escapedWhitespace ? + escaped : + high < 0 ? + // BMP codepoint + String.fromCharCode( high + 0x10000 ) : + // Supplemental Plane codepoint (surrogate pair) + String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); + }, + + // CSS string/identifier serialization + // https://drafts.csswg.org/cssom/#common-serializing-idioms + rcssescape = /([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g, + fcssescape = function( ch, asCodePoint ) { + if ( asCodePoint ) { + + // U+0000 NULL becomes U+FFFD REPLACEMENT CHARACTER + if ( ch === "\0" ) { + return "\uFFFD"; + } + + // Control characters and (dependent upon position) numbers get escaped as code points + return ch.slice( 0, -1 ) + "\\" + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; + } + + // Other potentially-special ASCII characters get backslash-escaped + return "\\" + ch; + }, + + // Used for iframes + // See setDocument() + // Removing the function wrapper causes a "Permission Denied" + // error in IE + unloadHandler = function() { + setDocument(); + }, + + disabledAncestor = addCombinator( + function( elem ) { + return elem.disabled === true && ("form" in elem || "label" in elem); + }, + { dir: "parentNode", next: "legend" } + ); + +// Optimize for push.apply( _, NodeList ) +try { + push.apply( + (arr = slice.call( preferredDoc.childNodes )), + preferredDoc.childNodes + ); + // Support: Android<4.0 + // Detect silently failing push.apply + arr[ preferredDoc.childNodes.length ].nodeType; +} catch ( e ) { + push = { apply: arr.length ? + + // Leverage slice if possible + function( target, els ) { + push_native.apply( target, slice.call(els) ); + } : + + // Support: IE<9 + // Otherwise append directly + function( target, els ) { + var j = target.length, + i = 0; + // Can't trust NodeList.length + while ( (target[j++] = els[i++]) ) {} + target.length = j - 1; + } + }; +} + +function Sizzle( selector, context, results, seed ) { + var m, i, elem, nid, match, groups, newSelector, + newContext = context && context.ownerDocument, + + // nodeType defaults to 9, since context defaults to document + nodeType = context ? context.nodeType : 9; + + results = results || []; + + // Return early from calls with invalid selector or context + if ( typeof selector !== "string" || !selector || + nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) { + + return results; + } + + // Try to shortcut find operations (as opposed to filters) in HTML documents + if ( !seed ) { + + if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) { + setDocument( context ); + } + context = context || document; + + if ( documentIsHTML ) { + + // If the selector is sufficiently simple, try using a "get*By*" DOM method + // (excepting DocumentFragment context, where the methods don't exist) + if ( nodeType !== 11 && (match = rquickExpr.exec( selector )) ) { + + // ID selector + if ( (m = match[1]) ) { + + // Document context + if ( nodeType === 9 ) { + if ( (elem = context.getElementById( m )) ) { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( elem.id === m ) { + results.push( elem ); + return results; + } + } else { + return results; + } + + // Element context + } else { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( newContext && (elem = newContext.getElementById( m )) && + contains( context, elem ) && + elem.id === m ) { + + results.push( elem ); + return results; + } + } + + // Type selector + } else if ( match[2] ) { + push.apply( results, context.getElementsByTagName( selector ) ); + return results; + + // Class selector + } else if ( (m = match[3]) && support.getElementsByClassName && + context.getElementsByClassName ) { + + push.apply( results, context.getElementsByClassName( m ) ); + return results; + } + } + + // Take advantage of querySelectorAll + if ( support.qsa && + !compilerCache[ selector + " " ] && + (!rbuggyQSA || !rbuggyQSA.test( selector )) ) { + + if ( nodeType !== 1 ) { + newContext = context; + newSelector = selector; + + // qSA looks outside Element context, which is not what we want + // Thanks to Andrew Dupont for this workaround technique + // Support: IE <=8 + // Exclude object elements + } else if ( context.nodeName.toLowerCase() !== "object" ) { + + // Capture the context ID, setting it first if necessary + if ( (nid = context.getAttribute( "id" )) ) { + nid = nid.replace( rcssescape, fcssescape ); + } else { + context.setAttribute( "id", (nid = expando) ); + } + + // Prefix every selector in the list + groups = tokenize( selector ); + i = groups.length; + while ( i-- ) { + groups[i] = "#" + nid + " " + toSelector( groups[i] ); + } + newSelector = groups.join( "," ); + + // Expand context for sibling selectors + newContext = rsibling.test( selector ) && testContext( context.parentNode ) || + context; + } + + if ( newSelector ) { + try { + push.apply( results, + newContext.querySelectorAll( newSelector ) + ); + return results; + } catch ( qsaError ) { + } finally { + if ( nid === expando ) { + context.removeAttribute( "id" ); + } + } + } + } + } + } + + // All others + return select( selector.replace( rtrim, "$1" ), context, results, seed ); +} + +/** + * Create key-value caches of limited size + * @returns {function(string, object)} Returns the Object data after storing it on itself with + * property name the (space-suffixed) string and (if the cache is larger than Expr.cacheLength) + * deleting the oldest entry + */ +function createCache() { + var keys = []; + + function cache( key, value ) { + // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) + if ( keys.push( key + " " ) > Expr.cacheLength ) { + // Only keep the most recent entries + delete cache[ keys.shift() ]; + } + return (cache[ key + " " ] = value); + } + return cache; +} + +/** + * Mark a function for special use by Sizzle + * @param {Function} fn The function to mark + */ +function markFunction( fn ) { + fn[ expando ] = true; + return fn; +} + +/** + * Support testing using an element + * @param {Function} fn Passed the created element and returns a boolean result + */ +function assert( fn ) { + var el = document.createElement("fieldset"); + + try { + return !!fn( el ); + } catch (e) { + return false; + } finally { + // Remove from its parent by default + if ( el.parentNode ) { + el.parentNode.removeChild( el ); + } + // release memory in IE + el = null; + } +} + +/** + * Adds the same handler for all of the specified attrs + * @param {String} attrs Pipe-separated list of attributes + * @param {Function} handler The method that will be applied + */ +function addHandle( attrs, handler ) { + var arr = attrs.split("|"), + i = arr.length; + + while ( i-- ) { + Expr.attrHandle[ arr[i] ] = handler; + } +} + +/** + * Checks document order of two siblings + * @param {Element} a + * @param {Element} b + * @returns {Number} Returns less than 0 if a precedes b, greater than 0 if a follows b + */ +function siblingCheck( a, b ) { + var cur = b && a, + diff = cur && a.nodeType === 1 && b.nodeType === 1 && + a.sourceIndex - b.sourceIndex; + + // Use IE sourceIndex if available on both nodes + if ( diff ) { + return diff; + } + + // Check if b follows a + if ( cur ) { + while ( (cur = cur.nextSibling) ) { + if ( cur === b ) { + return -1; + } + } + } + + return a ? 1 : -1; +} + +/** + * Returns a function to use in pseudos for input types + * @param {String} type + */ +function createInputPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for buttons + * @param {String} type + */ +function createButtonPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return (name === "input" || name === "button") && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for :enabled/:disabled + * @param {Boolean} disabled true for :disabled; false for :enabled + */ +function createDisabledPseudo( disabled ) { + + // Known :disabled false positives: fieldset[disabled] > legend:nth-of-type(n+2) :can-disable + return function( elem ) { + + // Only certain elements can match :enabled or :disabled + // https://html.spec.whatwg.org/multipage/scripting.html#selector-enabled + // https://html.spec.whatwg.org/multipage/scripting.html#selector-disabled + if ( "form" in elem ) { + + // Check for inherited disabledness on relevant non-disabled elements: + // * listed form-associated elements in a disabled fieldset + // https://html.spec.whatwg.org/multipage/forms.html#category-listed + // https://html.spec.whatwg.org/multipage/forms.html#concept-fe-disabled + // * option elements in a disabled optgroup + // https://html.spec.whatwg.org/multipage/forms.html#concept-option-disabled + // All such elements have a "form" property. + if ( elem.parentNode && elem.disabled === false ) { + + // Option elements defer to a parent optgroup if present + if ( "label" in elem ) { + if ( "label" in elem.parentNode ) { + return elem.parentNode.disabled === disabled; + } else { + return elem.disabled === disabled; + } + } + + // Support: IE 6 - 11 + // Use the isDisabled shortcut property to check for disabled fieldset ancestors + return elem.isDisabled === disabled || + + // Where there is no isDisabled, check manually + /* jshint -W018 */ + elem.isDisabled !== !disabled && + disabledAncestor( elem ) === disabled; + } + + return elem.disabled === disabled; + + // Try to winnow out elements that can't be disabled before trusting the disabled property. + // Some victims get caught in our net (label, legend, menu, track), but it shouldn't + // even exist on them, let alone have a boolean value. + } else if ( "label" in elem ) { + return elem.disabled === disabled; + } + + // Remaining elements are neither :enabled nor :disabled + return false; + }; +} + +/** + * Returns a function to use in pseudos for positionals + * @param {Function} fn + */ +function createPositionalPseudo( fn ) { + return markFunction(function( argument ) { + argument = +argument; + return markFunction(function( seed, matches ) { + var j, + matchIndexes = fn( [], seed.length, argument ), + i = matchIndexes.length; + + // Match elements found at the specified indexes + while ( i-- ) { + if ( seed[ (j = matchIndexes[i]) ] ) { + seed[j] = !(matches[j] = seed[j]); + } + } + }); + }); +} + +/** + * Checks a node for validity as a Sizzle context + * @param {Element|Object=} context + * @returns {Element|Object|Boolean} The input node if acceptable, otherwise a falsy value + */ +function testContext( context ) { + return context && typeof context.getElementsByTagName !== "undefined" && context; +} + +// Expose support vars for convenience +support = Sizzle.support = {}; + +/** + * Detects XML nodes + * @param {Element|Object} elem An element or a document + * @returns {Boolean} True iff elem is a non-HTML XML node + */ +isXML = Sizzle.isXML = function( elem ) { + // documentElement is verified for cases where it doesn't yet exist + // (such as loading iframes in IE - #4833) + var documentElement = elem && (elem.ownerDocument || elem).documentElement; + return documentElement ? documentElement.nodeName !== "HTML" : false; +}; + +/** + * Sets document-related variables once based on the current document + * @param {Element|Object} [doc] An element or document object to use to set the document + * @returns {Object} Returns the current document + */ +setDocument = Sizzle.setDocument = function( node ) { + var hasCompare, subWindow, + doc = node ? node.ownerDocument || node : preferredDoc; + + // Return early if doc is invalid or already selected + if ( doc === document || doc.nodeType !== 9 || !doc.documentElement ) { + return document; + } + + // Update global variables + document = doc; + docElem = document.documentElement; + documentIsHTML = !isXML( document ); + + // Support: IE 9-11, Edge + // Accessing iframe documents after unload throws "permission denied" errors (jQuery #13936) + if ( preferredDoc !== document && + (subWindow = document.defaultView) && subWindow.top !== subWindow ) { + + // Support: IE 11, Edge + if ( subWindow.addEventListener ) { + subWindow.addEventListener( "unload", unloadHandler, false ); + + // Support: IE 9 - 10 only + } else if ( subWindow.attachEvent ) { + subWindow.attachEvent( "onunload", unloadHandler ); + } + } + + /* Attributes + ---------------------------------------------------------------------- */ + + // Support: IE<8 + // Verify that getAttribute really returns attributes and not properties + // (excepting IE8 booleans) + support.attributes = assert(function( el ) { + el.className = "i"; + return !el.getAttribute("className"); + }); + + /* getElement(s)By* + ---------------------------------------------------------------------- */ + + // Check if getElementsByTagName("*") returns only elements + support.getElementsByTagName = assert(function( el ) { + el.appendChild( document.createComment("") ); + return !el.getElementsByTagName("*").length; + }); + + // Support: IE<9 + support.getElementsByClassName = rnative.test( document.getElementsByClassName ); + + // Support: IE<10 + // Check if getElementById returns elements by name + // The broken getElementById methods don't pick up programmatically-set names, + // so use a roundabout getElementsByName test + support.getById = assert(function( el ) { + docElem.appendChild( el ).id = expando; + return !document.getElementsByName || !document.getElementsByName( expando ).length; + }); + + // ID filter and find + if ( support.getById ) { + Expr.filter["ID"] = function( id ) { + var attrId = id.replace( runescape, funescape ); + return function( elem ) { + return elem.getAttribute("id") === attrId; + }; + }; + Expr.find["ID"] = function( id, context ) { + if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { + var elem = context.getElementById( id ); + return elem ? [ elem ] : []; + } + }; + } else { + Expr.filter["ID"] = function( id ) { + var attrId = id.replace( runescape, funescape ); + return function( elem ) { + var node = typeof elem.getAttributeNode !== "undefined" && + elem.getAttributeNode("id"); + return node && node.value === attrId; + }; + }; + + // Support: IE 6 - 7 only + // getElementById is not reliable as a find shortcut + Expr.find["ID"] = function( id, context ) { + if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { + var node, i, elems, + elem = context.getElementById( id ); + + if ( elem ) { + + // Verify the id attribute + node = elem.getAttributeNode("id"); + if ( node && node.value === id ) { + return [ elem ]; + } + + // Fall back on getElementsByName + elems = context.getElementsByName( id ); + i = 0; + while ( (elem = elems[i++]) ) { + node = elem.getAttributeNode("id"); + if ( node && node.value === id ) { + return [ elem ]; + } + } + } + + return []; + } + }; + } + + // Tag + Expr.find["TAG"] = support.getElementsByTagName ? + function( tag, context ) { + if ( typeof context.getElementsByTagName !== "undefined" ) { + return context.getElementsByTagName( tag ); + + // DocumentFragment nodes don't have gEBTN + } else if ( support.qsa ) { + return context.querySelectorAll( tag ); + } + } : + + function( tag, context ) { + var elem, + tmp = [], + i = 0, + // By happy coincidence, a (broken) gEBTN appears on DocumentFragment nodes too + results = context.getElementsByTagName( tag ); + + // Filter out possible comments + if ( tag === "*" ) { + while ( (elem = results[i++]) ) { + if ( elem.nodeType === 1 ) { + tmp.push( elem ); + } + } + + return tmp; + } + return results; + }; + + // Class + Expr.find["CLASS"] = support.getElementsByClassName && function( className, context ) { + if ( typeof context.getElementsByClassName !== "undefined" && documentIsHTML ) { + return context.getElementsByClassName( className ); + } + }; + + /* QSA/matchesSelector + ---------------------------------------------------------------------- */ + + // QSA and matchesSelector support + + // matchesSelector(:active) reports false when true (IE9/Opera 11.5) + rbuggyMatches = []; + + // qSa(:focus) reports false when true (Chrome 21) + // We allow this because of a bug in IE8/9 that throws an error + // whenever `document.activeElement` is accessed on an iframe + // So, we allow :focus to pass through QSA all the time to avoid the IE error + // See https://bugs.jquery.com/ticket/13378 + rbuggyQSA = []; + + if ( (support.qsa = rnative.test( document.querySelectorAll )) ) { + // Build QSA regex + // Regex strategy adopted from Diego Perini + assert(function( el ) { + // Select is set to empty string on purpose + // This is to test IE's treatment of not explicitly + // setting a boolean content attribute, + // since its presence should be enough + // https://bugs.jquery.com/ticket/12359 + docElem.appendChild( el ).innerHTML = "" + + ""; + + // Support: IE8, Opera 11-12.16 + // Nothing should be selected when empty strings follow ^= or $= or *= + // The test attribute must be unknown in Opera but "safe" for WinRT + // https://msdn.microsoft.com/en-us/library/ie/hh465388.aspx#attribute_section + if ( el.querySelectorAll("[msallowcapture^='']").length ) { + rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:''|\"\")" ); + } + + // Support: IE8 + // Boolean attributes and "value" are not treated correctly + if ( !el.querySelectorAll("[selected]").length ) { + rbuggyQSA.push( "\\[" + whitespace + "*(?:value|" + booleans + ")" ); + } + + // Support: Chrome<29, Android<4.4, Safari<7.0+, iOS<7.0+, PhantomJS<1.9.8+ + if ( !el.querySelectorAll( "[id~=" + expando + "-]" ).length ) { + rbuggyQSA.push("~="); + } + + // Webkit/Opera - :checked should return selected option elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + // IE8 throws error here and will not see later tests + if ( !el.querySelectorAll(":checked").length ) { + rbuggyQSA.push(":checked"); + } + + // Support: Safari 8+, iOS 8+ + // https://bugs.webkit.org/show_bug.cgi?id=136851 + // In-page `selector#id sibling-combinator selector` fails + if ( !el.querySelectorAll( "a#" + expando + "+*" ).length ) { + rbuggyQSA.push(".#.+[+~]"); + } + }); + + assert(function( el ) { + el.innerHTML = "" + + ""; + + // Support: Windows 8 Native Apps + // The type and name attributes are restricted during .innerHTML assignment + var input = document.createElement("input"); + input.setAttribute( "type", "hidden" ); + el.appendChild( input ).setAttribute( "name", "D" ); + + // Support: IE8 + // Enforce case-sensitivity of name attribute + if ( el.querySelectorAll("[name=d]").length ) { + rbuggyQSA.push( "name" + whitespace + "*[*^$|!~]?=" ); + } + + // FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled) + // IE8 throws error here and will not see later tests + if ( el.querySelectorAll(":enabled").length !== 2 ) { + rbuggyQSA.push( ":enabled", ":disabled" ); + } + + // Support: IE9-11+ + // IE's :disabled selector does not pick up the children of disabled fieldsets + docElem.appendChild( el ).disabled = true; + if ( el.querySelectorAll(":disabled").length !== 2 ) { + rbuggyQSA.push( ":enabled", ":disabled" ); + } + + // Opera 10-11 does not throw on post-comma invalid pseudos + el.querySelectorAll("*,:x"); + rbuggyQSA.push(",.*:"); + }); + } + + if ( (support.matchesSelector = rnative.test( (matches = docElem.matches || + docElem.webkitMatchesSelector || + docElem.mozMatchesSelector || + docElem.oMatchesSelector || + docElem.msMatchesSelector) )) ) { + + assert(function( el ) { + // Check to see if it's possible to do matchesSelector + // on a disconnected node (IE 9) + support.disconnectedMatch = matches.call( el, "*" ); + + // This should fail with an exception + // Gecko does not error, returns false instead + matches.call( el, "[s!='']:x" ); + rbuggyMatches.push( "!=", pseudos ); + }); + } + + rbuggyQSA = rbuggyQSA.length && new RegExp( rbuggyQSA.join("|") ); + rbuggyMatches = rbuggyMatches.length && new RegExp( rbuggyMatches.join("|") ); + + /* Contains + ---------------------------------------------------------------------- */ + hasCompare = rnative.test( docElem.compareDocumentPosition ); + + // Element contains another + // Purposefully self-exclusive + // As in, an element does not contain itself + contains = hasCompare || rnative.test( docElem.contains ) ? + function( a, b ) { + var adown = a.nodeType === 9 ? a.documentElement : a, + bup = b && b.parentNode; + return a === bup || !!( bup && bup.nodeType === 1 && ( + adown.contains ? + adown.contains( bup ) : + a.compareDocumentPosition && a.compareDocumentPosition( bup ) & 16 + )); + } : + function( a, b ) { + if ( b ) { + while ( (b = b.parentNode) ) { + if ( b === a ) { + return true; + } + } + } + return false; + }; + + /* Sorting + ---------------------------------------------------------------------- */ + + // Document order sorting + sortOrder = hasCompare ? + function( a, b ) { + + // Flag for duplicate removal + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + // Sort on method existence if only one input has compareDocumentPosition + var compare = !a.compareDocumentPosition - !b.compareDocumentPosition; + if ( compare ) { + return compare; + } + + // Calculate position if both inputs belong to the same document + compare = ( a.ownerDocument || a ) === ( b.ownerDocument || b ) ? + a.compareDocumentPosition( b ) : + + // Otherwise we know they are disconnected + 1; + + // Disconnected nodes + if ( compare & 1 || + (!support.sortDetached && b.compareDocumentPosition( a ) === compare) ) { + + // Choose the first element that is related to our preferred document + if ( a === document || a.ownerDocument === preferredDoc && contains(preferredDoc, a) ) { + return -1; + } + if ( b === document || b.ownerDocument === preferredDoc && contains(preferredDoc, b) ) { + return 1; + } + + // Maintain original order + return sortInput ? + ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : + 0; + } + + return compare & 4 ? -1 : 1; + } : + function( a, b ) { + // Exit early if the nodes are identical + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + var cur, + i = 0, + aup = a.parentNode, + bup = b.parentNode, + ap = [ a ], + bp = [ b ]; + + // Parentless nodes are either documents or disconnected + if ( !aup || !bup ) { + return a === document ? -1 : + b === document ? 1 : + aup ? -1 : + bup ? 1 : + sortInput ? + ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : + 0; + + // If the nodes are siblings, we can do a quick check + } else if ( aup === bup ) { + return siblingCheck( a, b ); + } + + // Otherwise we need full lists of their ancestors for comparison + cur = a; + while ( (cur = cur.parentNode) ) { + ap.unshift( cur ); + } + cur = b; + while ( (cur = cur.parentNode) ) { + bp.unshift( cur ); + } + + // Walk down the tree looking for a discrepancy + while ( ap[i] === bp[i] ) { + i++; + } + + return i ? + // Do a sibling check if the nodes have a common ancestor + siblingCheck( ap[i], bp[i] ) : + + // Otherwise nodes in our document sort first + ap[i] === preferredDoc ? -1 : + bp[i] === preferredDoc ? 1 : + 0; + }; + + return document; +}; + +Sizzle.matches = function( expr, elements ) { + return Sizzle( expr, null, null, elements ); +}; + +Sizzle.matchesSelector = function( elem, expr ) { + // Set document vars if needed + if ( ( elem.ownerDocument || elem ) !== document ) { + setDocument( elem ); + } + + // Make sure that attribute selectors are quoted + expr = expr.replace( rattributeQuotes, "='$1']" ); + + if ( support.matchesSelector && documentIsHTML && + !compilerCache[ expr + " " ] && + ( !rbuggyMatches || !rbuggyMatches.test( expr ) ) && + ( !rbuggyQSA || !rbuggyQSA.test( expr ) ) ) { + + try { + var ret = matches.call( elem, expr ); + + // IE 9's matchesSelector returns false on disconnected nodes + if ( ret || support.disconnectedMatch || + // As well, disconnected nodes are said to be in a document + // fragment in IE 9 + elem.document && elem.document.nodeType !== 11 ) { + return ret; + } + } catch (e) {} + } + + return Sizzle( expr, document, null, [ elem ] ).length > 0; +}; + +Sizzle.contains = function( context, elem ) { + // Set document vars if needed + if ( ( context.ownerDocument || context ) !== document ) { + setDocument( context ); + } + return contains( context, elem ); +}; + +Sizzle.attr = function( elem, name ) { + // Set document vars if needed + if ( ( elem.ownerDocument || elem ) !== document ) { + setDocument( elem ); + } + + var fn = Expr.attrHandle[ name.toLowerCase() ], + // Don't get fooled by Object.prototype properties (jQuery #13807) + val = fn && hasOwn.call( Expr.attrHandle, name.toLowerCase() ) ? + fn( elem, name, !documentIsHTML ) : + undefined; + + return val !== undefined ? + val : + support.attributes || !documentIsHTML ? + elem.getAttribute( name ) : + (val = elem.getAttributeNode(name)) && val.specified ? + val.value : + null; +}; + +Sizzle.escape = function( sel ) { + return (sel + "").replace( rcssescape, fcssescape ); +}; + +Sizzle.error = function( msg ) { + throw new Error( "Syntax error, unrecognized expression: " + msg ); +}; + +/** + * Document sorting and removing duplicates + * @param {ArrayLike} results + */ +Sizzle.uniqueSort = function( results ) { + var elem, + duplicates = [], + j = 0, + i = 0; + + // Unless we *know* we can detect duplicates, assume their presence + hasDuplicate = !support.detectDuplicates; + sortInput = !support.sortStable && results.slice( 0 ); + results.sort( sortOrder ); + + if ( hasDuplicate ) { + while ( (elem = results[i++]) ) { + if ( elem === results[ i ] ) { + j = duplicates.push( i ); + } + } + while ( j-- ) { + results.splice( duplicates[ j ], 1 ); + } + } + + // Clear input after sorting to release objects + // See https://github.com/jquery/sizzle/pull/225 + sortInput = null; + + return results; +}; + +/** + * Utility function for retrieving the text value of an array of DOM nodes + * @param {Array|Element} elem + */ +getText = Sizzle.getText = function( elem ) { + var node, + ret = "", + i = 0, + nodeType = elem.nodeType; + + if ( !nodeType ) { + // If no nodeType, this is expected to be an array + while ( (node = elem[i++]) ) { + // Do not traverse comment nodes + ret += getText( node ); + } + } else if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) { + // Use textContent for elements + // innerText usage removed for consistency of new lines (jQuery #11153) + if ( typeof elem.textContent === "string" ) { + return elem.textContent; + } else { + // Traverse its children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { + ret += getText( elem ); + } + } + } else if ( nodeType === 3 || nodeType === 4 ) { + return elem.nodeValue; + } + // Do not include comment or processing instruction nodes + + return ret; +}; + +Expr = Sizzle.selectors = { + + // Can be adjusted by the user + cacheLength: 50, + + createPseudo: markFunction, + + match: matchExpr, + + attrHandle: {}, + + find: {}, + + relative: { + ">": { dir: "parentNode", first: true }, + " ": { dir: "parentNode" }, + "+": { dir: "previousSibling", first: true }, + "~": { dir: "previousSibling" } + }, + + preFilter: { + "ATTR": function( match ) { + match[1] = match[1].replace( runescape, funescape ); + + // Move the given value to match[3] whether quoted or unquoted + match[3] = ( match[3] || match[4] || match[5] || "" ).replace( runescape, funescape ); + + if ( match[2] === "~=" ) { + match[3] = " " + match[3] + " "; + } + + return match.slice( 0, 4 ); + }, + + "CHILD": function( match ) { + /* matches from matchExpr["CHILD"] + 1 type (only|nth|...) + 2 what (child|of-type) + 3 argument (even|odd|\d*|\d*n([+-]\d+)?|...) + 4 xn-component of xn+y argument ([+-]?\d*n|) + 5 sign of xn-component + 6 x of xn-component + 7 sign of y-component + 8 y of y-component + */ + match[1] = match[1].toLowerCase(); + + if ( match[1].slice( 0, 3 ) === "nth" ) { + // nth-* requires argument + if ( !match[3] ) { + Sizzle.error( match[0] ); + } + + // numeric x and y parameters for Expr.filter.CHILD + // remember that false/true cast respectively to 0/1 + match[4] = +( match[4] ? match[5] + (match[6] || 1) : 2 * ( match[3] === "even" || match[3] === "odd" ) ); + match[5] = +( ( match[7] + match[8] ) || match[3] === "odd" ); + + // other types prohibit arguments + } else if ( match[3] ) { + Sizzle.error( match[0] ); + } + + return match; + }, + + "PSEUDO": function( match ) { + var excess, + unquoted = !match[6] && match[2]; + + if ( matchExpr["CHILD"].test( match[0] ) ) { + return null; + } + + // Accept quoted arguments as-is + if ( match[3] ) { + match[2] = match[4] || match[5] || ""; + + // Strip excess characters from unquoted arguments + } else if ( unquoted && rpseudo.test( unquoted ) && + // Get excess from tokenize (recursively) + (excess = tokenize( unquoted, true )) && + // advance to the next closing parenthesis + (excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length) ) { + + // excess is a negative index + match[0] = match[0].slice( 0, excess ); + match[2] = unquoted.slice( 0, excess ); + } + + // Return only captures needed by the pseudo filter method (type and argument) + return match.slice( 0, 3 ); + } + }, + + filter: { + + "TAG": function( nodeNameSelector ) { + var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase(); + return nodeNameSelector === "*" ? + function() { return true; } : + function( elem ) { + return elem.nodeName && elem.nodeName.toLowerCase() === nodeName; + }; + }, + + "CLASS": function( className ) { + var pattern = classCache[ className + " " ]; + + return pattern || + (pattern = new RegExp( "(^|" + whitespace + ")" + className + "(" + whitespace + "|$)" )) && + classCache( className, function( elem ) { + return pattern.test( typeof elem.className === "string" && elem.className || typeof elem.getAttribute !== "undefined" && elem.getAttribute("class") || "" ); + }); + }, + + "ATTR": function( name, operator, check ) { + return function( elem ) { + var result = Sizzle.attr( elem, name ); + + if ( result == null ) { + return operator === "!="; + } + if ( !operator ) { + return true; + } + + result += ""; + + return operator === "=" ? result === check : + operator === "!=" ? result !== check : + operator === "^=" ? check && result.indexOf( check ) === 0 : + operator === "*=" ? check && result.indexOf( check ) > -1 : + operator === "$=" ? check && result.slice( -check.length ) === check : + operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 : + operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" : + false; + }; + }, + + "CHILD": function( type, what, argument, first, last ) { + var simple = type.slice( 0, 3 ) !== "nth", + forward = type.slice( -4 ) !== "last", + ofType = what === "of-type"; + + return first === 1 && last === 0 ? + + // Shortcut for :nth-*(n) + function( elem ) { + return !!elem.parentNode; + } : + + function( elem, context, xml ) { + var cache, uniqueCache, outerCache, node, nodeIndex, start, + dir = simple !== forward ? "nextSibling" : "previousSibling", + parent = elem.parentNode, + name = ofType && elem.nodeName.toLowerCase(), + useCache = !xml && !ofType, + diff = false; + + if ( parent ) { + + // :(first|last|only)-(child|of-type) + if ( simple ) { + while ( dir ) { + node = elem; + while ( (node = node[ dir ]) ) { + if ( ofType ? + node.nodeName.toLowerCase() === name : + node.nodeType === 1 ) { + + return false; + } + } + // Reverse direction for :only-* (if we haven't yet done so) + start = dir = type === "only" && !start && "nextSibling"; + } + return true; + } + + start = [ forward ? parent.firstChild : parent.lastChild ]; + + // non-xml :nth-child(...) stores cache data on `parent` + if ( forward && useCache ) { + + // Seek `elem` from a previously-cached index + + // ...in a gzip-friendly way + node = parent; + outerCache = node[ expando ] || (node[ expando ] = {}); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + (outerCache[ node.uniqueID ] = {}); + + cache = uniqueCache[ type ] || []; + nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; + diff = nodeIndex && cache[ 2 ]; + node = nodeIndex && parent.childNodes[ nodeIndex ]; + + while ( (node = ++nodeIndex && node && node[ dir ] || + + // Fallback to seeking `elem` from the start + (diff = nodeIndex = 0) || start.pop()) ) { + + // When found, cache indexes on `parent` and break + if ( node.nodeType === 1 && ++diff && node === elem ) { + uniqueCache[ type ] = [ dirruns, nodeIndex, diff ]; + break; + } + } + + } else { + // Use previously-cached element index if available + if ( useCache ) { + // ...in a gzip-friendly way + node = elem; + outerCache = node[ expando ] || (node[ expando ] = {}); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + (outerCache[ node.uniqueID ] = {}); + + cache = uniqueCache[ type ] || []; + nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; + diff = nodeIndex; + } + + // xml :nth-child(...) + // or :nth-last-child(...) or :nth(-last)?-of-type(...) + if ( diff === false ) { + // Use the same loop as above to seek `elem` from the start + while ( (node = ++nodeIndex && node && node[ dir ] || + (diff = nodeIndex = 0) || start.pop()) ) { + + if ( ( ofType ? + node.nodeName.toLowerCase() === name : + node.nodeType === 1 ) && + ++diff ) { + + // Cache the index of each encountered element + if ( useCache ) { + outerCache = node[ expando ] || (node[ expando ] = {}); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + (outerCache[ node.uniqueID ] = {}); + + uniqueCache[ type ] = [ dirruns, diff ]; + } + + if ( node === elem ) { + break; + } + } + } + } + } + + // Incorporate the offset, then check against cycle size + diff -= last; + return diff === first || ( diff % first === 0 && diff / first >= 0 ); + } + }; + }, + + "PSEUDO": function( pseudo, argument ) { + // pseudo-class names are case-insensitive + // http://www.w3.org/TR/selectors/#pseudo-classes + // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters + // Remember that setFilters inherits from pseudos + var args, + fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] || + Sizzle.error( "unsupported pseudo: " + pseudo ); + + // The user may use createPseudo to indicate that + // arguments are needed to create the filter function + // just as Sizzle does + if ( fn[ expando ] ) { + return fn( argument ); + } + + // But maintain support for old signatures + if ( fn.length > 1 ) { + args = [ pseudo, pseudo, "", argument ]; + return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ? + markFunction(function( seed, matches ) { + var idx, + matched = fn( seed, argument ), + i = matched.length; + while ( i-- ) { + idx = indexOf( seed, matched[i] ); + seed[ idx ] = !( matches[ idx ] = matched[i] ); + } + }) : + function( elem ) { + return fn( elem, 0, args ); + }; + } + + return fn; + } + }, + + pseudos: { + // Potentially complex pseudos + "not": markFunction(function( selector ) { + // Trim the selector passed to compile + // to avoid treating leading and trailing + // spaces as combinators + var input = [], + results = [], + matcher = compile( selector.replace( rtrim, "$1" ) ); + + return matcher[ expando ] ? + markFunction(function( seed, matches, context, xml ) { + var elem, + unmatched = matcher( seed, null, xml, [] ), + i = seed.length; + + // Match elements unmatched by `matcher` + while ( i-- ) { + if ( (elem = unmatched[i]) ) { + seed[i] = !(matches[i] = elem); + } + } + }) : + function( elem, context, xml ) { + input[0] = elem; + matcher( input, null, xml, results ); + // Don't keep the element (issue #299) + input[0] = null; + return !results.pop(); + }; + }), + + "has": markFunction(function( selector ) { + return function( elem ) { + return Sizzle( selector, elem ).length > 0; + }; + }), + + "contains": markFunction(function( text ) { + text = text.replace( runescape, funescape ); + return function( elem ) { + return ( elem.textContent || elem.innerText || getText( elem ) ).indexOf( text ) > -1; + }; + }), + + // "Whether an element is represented by a :lang() selector + // is based solely on the element's language value + // being equal to the identifier C, + // or beginning with the identifier C immediately followed by "-". + // The matching of C against the element's language value is performed case-insensitively. + // The identifier C does not have to be a valid language name." + // http://www.w3.org/TR/selectors/#lang-pseudo + "lang": markFunction( function( lang ) { + // lang value must be a valid identifier + if ( !ridentifier.test(lang || "") ) { + Sizzle.error( "unsupported lang: " + lang ); + } + lang = lang.replace( runescape, funescape ).toLowerCase(); + return function( elem ) { + var elemLang; + do { + if ( (elemLang = documentIsHTML ? + elem.lang : + elem.getAttribute("xml:lang") || elem.getAttribute("lang")) ) { + + elemLang = elemLang.toLowerCase(); + return elemLang === lang || elemLang.indexOf( lang + "-" ) === 0; + } + } while ( (elem = elem.parentNode) && elem.nodeType === 1 ); + return false; + }; + }), + + // Miscellaneous + "target": function( elem ) { + var hash = window.location && window.location.hash; + return hash && hash.slice( 1 ) === elem.id; + }, + + "root": function( elem ) { + return elem === docElem; + }, + + "focus": function( elem ) { + return elem === document.activeElement && (!document.hasFocus || document.hasFocus()) && !!(elem.type || elem.href || ~elem.tabIndex); + }, + + // Boolean properties + "enabled": createDisabledPseudo( false ), + "disabled": createDisabledPseudo( true ), + + "checked": function( elem ) { + // In CSS3, :checked should return both checked and selected elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + var nodeName = elem.nodeName.toLowerCase(); + return (nodeName === "input" && !!elem.checked) || (nodeName === "option" && !!elem.selected); + }, + + "selected": function( elem ) { + // Accessing this property makes selected-by-default + // options in Safari work properly + if ( elem.parentNode ) { + elem.parentNode.selectedIndex; + } + + return elem.selected === true; + }, + + // Contents + "empty": function( elem ) { + // http://www.w3.org/TR/selectors/#empty-pseudo + // :empty is negated by element (1) or content nodes (text: 3; cdata: 4; entity ref: 5), + // but not by others (comment: 8; processing instruction: 7; etc.) + // nodeType < 6 works because attributes (2) do not appear as children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { + if ( elem.nodeType < 6 ) { + return false; + } + } + return true; + }, + + "parent": function( elem ) { + return !Expr.pseudos["empty"]( elem ); + }, + + // Element/input types + "header": function( elem ) { + return rheader.test( elem.nodeName ); + }, + + "input": function( elem ) { + return rinputs.test( elem.nodeName ); + }, + + "button": function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === "button" || name === "button"; + }, + + "text": function( elem ) { + var attr; + return elem.nodeName.toLowerCase() === "input" && + elem.type === "text" && + + // Support: IE<8 + // New HTML5 attribute values (e.g., "search") appear with elem.type === "text" + ( (attr = elem.getAttribute("type")) == null || attr.toLowerCase() === "text" ); + }, + + // Position-in-collection + "first": createPositionalPseudo(function() { + return [ 0 ]; + }), + + "last": createPositionalPseudo(function( matchIndexes, length ) { + return [ length - 1 ]; + }), + + "eq": createPositionalPseudo(function( matchIndexes, length, argument ) { + return [ argument < 0 ? argument + length : argument ]; + }), + + "even": createPositionalPseudo(function( matchIndexes, length ) { + var i = 0; + for ( ; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "odd": createPositionalPseudo(function( matchIndexes, length ) { + var i = 1; + for ( ; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "lt": createPositionalPseudo(function( matchIndexes, length, argument ) { + var i = argument < 0 ? argument + length : argument; + for ( ; --i >= 0; ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "gt": createPositionalPseudo(function( matchIndexes, length, argument ) { + var i = argument < 0 ? argument + length : argument; + for ( ; ++i < length; ) { + matchIndexes.push( i ); + } + return matchIndexes; + }) + } +}; + +Expr.pseudos["nth"] = Expr.pseudos["eq"]; + +// Add button/input type pseudos +for ( i in { radio: true, checkbox: true, file: true, password: true, image: true } ) { + Expr.pseudos[ i ] = createInputPseudo( i ); +} +for ( i in { submit: true, reset: true } ) { + Expr.pseudos[ i ] = createButtonPseudo( i ); +} + +// Easy API for creating new setFilters +function setFilters() {} +setFilters.prototype = Expr.filters = Expr.pseudos; +Expr.setFilters = new setFilters(); + +tokenize = Sizzle.tokenize = function( selector, parseOnly ) { + var matched, match, tokens, type, + soFar, groups, preFilters, + cached = tokenCache[ selector + " " ]; + + if ( cached ) { + return parseOnly ? 0 : cached.slice( 0 ); + } + + soFar = selector; + groups = []; + preFilters = Expr.preFilter; + + while ( soFar ) { + + // Comma and first run + if ( !matched || (match = rcomma.exec( soFar )) ) { + if ( match ) { + // Don't consume trailing commas as valid + soFar = soFar.slice( match[0].length ) || soFar; + } + groups.push( (tokens = []) ); + } + + matched = false; + + // Combinators + if ( (match = rcombinators.exec( soFar )) ) { + matched = match.shift(); + tokens.push({ + value: matched, + // Cast descendant combinators to space + type: match[0].replace( rtrim, " " ) + }); + soFar = soFar.slice( matched.length ); + } + + // Filters + for ( type in Expr.filter ) { + if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] || + (match = preFilters[ type ]( match ))) ) { + matched = match.shift(); + tokens.push({ + value: matched, + type: type, + matches: match + }); + soFar = soFar.slice( matched.length ); + } + } + + if ( !matched ) { + break; + } + } + + // Return the length of the invalid excess + // if we're just parsing + // Otherwise, throw an error or return tokens + return parseOnly ? + soFar.length : + soFar ? + Sizzle.error( selector ) : + // Cache the tokens + tokenCache( selector, groups ).slice( 0 ); +}; + +function toSelector( tokens ) { + var i = 0, + len = tokens.length, + selector = ""; + for ( ; i < len; i++ ) { + selector += tokens[i].value; + } + return selector; +} + +function addCombinator( matcher, combinator, base ) { + var dir = combinator.dir, + skip = combinator.next, + key = skip || dir, + checkNonElements = base && key === "parentNode", + doneName = done++; + + return combinator.first ? + // Check against closest ancestor/preceding element + function( elem, context, xml ) { + while ( (elem = elem[ dir ]) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + return matcher( elem, context, xml ); + } + } + return false; + } : + + // Check against all ancestor/preceding elements + function( elem, context, xml ) { + var oldCache, uniqueCache, outerCache, + newCache = [ dirruns, doneName ]; + + // We can't set arbitrary data on XML nodes, so they don't benefit from combinator caching + if ( xml ) { + while ( (elem = elem[ dir ]) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + if ( matcher( elem, context, xml ) ) { + return true; + } + } + } + } else { + while ( (elem = elem[ dir ]) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + outerCache = elem[ expando ] || (elem[ expando ] = {}); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ elem.uniqueID ] || (outerCache[ elem.uniqueID ] = {}); + + if ( skip && skip === elem.nodeName.toLowerCase() ) { + elem = elem[ dir ] || elem; + } else if ( (oldCache = uniqueCache[ key ]) && + oldCache[ 0 ] === dirruns && oldCache[ 1 ] === doneName ) { + + // Assign to newCache so results back-propagate to previous elements + return (newCache[ 2 ] = oldCache[ 2 ]); + } else { + // Reuse newcache so results back-propagate to previous elements + uniqueCache[ key ] = newCache; + + // A match means we're done; a fail means we have to keep checking + if ( (newCache[ 2 ] = matcher( elem, context, xml )) ) { + return true; + } + } + } + } + } + return false; + }; +} + +function elementMatcher( matchers ) { + return matchers.length > 1 ? + function( elem, context, xml ) { + var i = matchers.length; + while ( i-- ) { + if ( !matchers[i]( elem, context, xml ) ) { + return false; + } + } + return true; + } : + matchers[0]; +} + +function multipleContexts( selector, contexts, results ) { + var i = 0, + len = contexts.length; + for ( ; i < len; i++ ) { + Sizzle( selector, contexts[i], results ); + } + return results; +} + +function condense( unmatched, map, filter, context, xml ) { + var elem, + newUnmatched = [], + i = 0, + len = unmatched.length, + mapped = map != null; + + for ( ; i < len; i++ ) { + if ( (elem = unmatched[i]) ) { + if ( !filter || filter( elem, context, xml ) ) { + newUnmatched.push( elem ); + if ( mapped ) { + map.push( i ); + } + } + } + } + + return newUnmatched; +} + +function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) { + if ( postFilter && !postFilter[ expando ] ) { + postFilter = setMatcher( postFilter ); + } + if ( postFinder && !postFinder[ expando ] ) { + postFinder = setMatcher( postFinder, postSelector ); + } + return markFunction(function( seed, results, context, xml ) { + var temp, i, elem, + preMap = [], + postMap = [], + preexisting = results.length, + + // Get initial elements from seed or context + elems = seed || multipleContexts( selector || "*", context.nodeType ? [ context ] : context, [] ), + + // Prefilter to get matcher input, preserving a map for seed-results synchronization + matcherIn = preFilter && ( seed || !selector ) ? + condense( elems, preMap, preFilter, context, xml ) : + elems, + + matcherOut = matcher ? + // If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results, + postFinder || ( seed ? preFilter : preexisting || postFilter ) ? + + // ...intermediate processing is necessary + [] : + + // ...otherwise use results directly + results : + matcherIn; + + // Find primary matches + if ( matcher ) { + matcher( matcherIn, matcherOut, context, xml ); + } + + // Apply postFilter + if ( postFilter ) { + temp = condense( matcherOut, postMap ); + postFilter( temp, [], context, xml ); + + // Un-match failing elements by moving them back to matcherIn + i = temp.length; + while ( i-- ) { + if ( (elem = temp[i]) ) { + matcherOut[ postMap[i] ] = !(matcherIn[ postMap[i] ] = elem); + } + } + } + + if ( seed ) { + if ( postFinder || preFilter ) { + if ( postFinder ) { + // Get the final matcherOut by condensing this intermediate into postFinder contexts + temp = []; + i = matcherOut.length; + while ( i-- ) { + if ( (elem = matcherOut[i]) ) { + // Restore matcherIn since elem is not yet a final match + temp.push( (matcherIn[i] = elem) ); + } + } + postFinder( null, (matcherOut = []), temp, xml ); + } + + // Move matched elements from seed to results to keep them synchronized + i = matcherOut.length; + while ( i-- ) { + if ( (elem = matcherOut[i]) && + (temp = postFinder ? indexOf( seed, elem ) : preMap[i]) > -1 ) { + + seed[temp] = !(results[temp] = elem); + } + } + } + + // Add elements to results, through postFinder if defined + } else { + matcherOut = condense( + matcherOut === results ? + matcherOut.splice( preexisting, matcherOut.length ) : + matcherOut + ); + if ( postFinder ) { + postFinder( null, results, matcherOut, xml ); + } else { + push.apply( results, matcherOut ); + } + } + }); +} + +function matcherFromTokens( tokens ) { + var checkContext, matcher, j, + len = tokens.length, + leadingRelative = Expr.relative[ tokens[0].type ], + implicitRelative = leadingRelative || Expr.relative[" "], + i = leadingRelative ? 1 : 0, + + // The foundational matcher ensures that elements are reachable from top-level context(s) + matchContext = addCombinator( function( elem ) { + return elem === checkContext; + }, implicitRelative, true ), + matchAnyContext = addCombinator( function( elem ) { + return indexOf( checkContext, elem ) > -1; + }, implicitRelative, true ), + matchers = [ function( elem, context, xml ) { + var ret = ( !leadingRelative && ( xml || context !== outermostContext ) ) || ( + (checkContext = context).nodeType ? + matchContext( elem, context, xml ) : + matchAnyContext( elem, context, xml ) ); + // Avoid hanging onto element (issue #299) + checkContext = null; + return ret; + } ]; + + for ( ; i < len; i++ ) { + if ( (matcher = Expr.relative[ tokens[i].type ]) ) { + matchers = [ addCombinator(elementMatcher( matchers ), matcher) ]; + } else { + matcher = Expr.filter[ tokens[i].type ].apply( null, tokens[i].matches ); + + // Return special upon seeing a positional matcher + if ( matcher[ expando ] ) { + // Find the next relative operator (if any) for proper handling + j = ++i; + for ( ; j < len; j++ ) { + if ( Expr.relative[ tokens[j].type ] ) { + break; + } + } + return setMatcher( + i > 1 && elementMatcher( matchers ), + i > 1 && toSelector( + // If the preceding token was a descendant combinator, insert an implicit any-element `*` + tokens.slice( 0, i - 1 ).concat({ value: tokens[ i - 2 ].type === " " ? "*" : "" }) + ).replace( rtrim, "$1" ), + matcher, + i < j && matcherFromTokens( tokens.slice( i, j ) ), + j < len && matcherFromTokens( (tokens = tokens.slice( j )) ), + j < len && toSelector( tokens ) + ); + } + matchers.push( matcher ); + } + } + + return elementMatcher( matchers ); +} + +function matcherFromGroupMatchers( elementMatchers, setMatchers ) { + var bySet = setMatchers.length > 0, + byElement = elementMatchers.length > 0, + superMatcher = function( seed, context, xml, results, outermost ) { + var elem, j, matcher, + matchedCount = 0, + i = "0", + unmatched = seed && [], + setMatched = [], + contextBackup = outermostContext, + // We must always have either seed elements or outermost context + elems = seed || byElement && Expr.find["TAG"]( "*", outermost ), + // Use integer dirruns iff this is the outermost matcher + dirrunsUnique = (dirruns += contextBackup == null ? 1 : Math.random() || 0.1), + len = elems.length; + + if ( outermost ) { + outermostContext = context === document || context || outermost; + } + + // Add elements passing elementMatchers directly to results + // Support: IE<9, Safari + // Tolerate NodeList properties (IE: "length"; Safari: ) matching elements by id + for ( ; i !== len && (elem = elems[i]) != null; i++ ) { + if ( byElement && elem ) { + j = 0; + if ( !context && elem.ownerDocument !== document ) { + setDocument( elem ); + xml = !documentIsHTML; + } + while ( (matcher = elementMatchers[j++]) ) { + if ( matcher( elem, context || document, xml) ) { + results.push( elem ); + break; + } + } + if ( outermost ) { + dirruns = dirrunsUnique; + } + } + + // Track unmatched elements for set filters + if ( bySet ) { + // They will have gone through all possible matchers + if ( (elem = !matcher && elem) ) { + matchedCount--; + } + + // Lengthen the array for every element, matched or not + if ( seed ) { + unmatched.push( elem ); + } + } + } + + // `i` is now the count of elements visited above, and adding it to `matchedCount` + // makes the latter nonnegative. + matchedCount += i; + + // Apply set filters to unmatched elements + // NOTE: This can be skipped if there are no unmatched elements (i.e., `matchedCount` + // equals `i`), unless we didn't visit _any_ elements in the above loop because we have + // no element matchers and no seed. + // Incrementing an initially-string "0" `i` allows `i` to remain a string only in that + // case, which will result in a "00" `matchedCount` that differs from `i` but is also + // numerically zero. + if ( bySet && i !== matchedCount ) { + j = 0; + while ( (matcher = setMatchers[j++]) ) { + matcher( unmatched, setMatched, context, xml ); + } + + if ( seed ) { + // Reintegrate element matches to eliminate the need for sorting + if ( matchedCount > 0 ) { + while ( i-- ) { + if ( !(unmatched[i] || setMatched[i]) ) { + setMatched[i] = pop.call( results ); + } + } + } + + // Discard index placeholder values to get only actual matches + setMatched = condense( setMatched ); + } + + // Add matches to results + push.apply( results, setMatched ); + + // Seedless set matches succeeding multiple successful matchers stipulate sorting + if ( outermost && !seed && setMatched.length > 0 && + ( matchedCount + setMatchers.length ) > 1 ) { + + Sizzle.uniqueSort( results ); + } + } + + // Override manipulation of globals by nested matchers + if ( outermost ) { + dirruns = dirrunsUnique; + outermostContext = contextBackup; + } + + return unmatched; + }; + + return bySet ? + markFunction( superMatcher ) : + superMatcher; +} + +compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { + var i, + setMatchers = [], + elementMatchers = [], + cached = compilerCache[ selector + " " ]; + + if ( !cached ) { + // Generate a function of recursive functions that can be used to check each element + if ( !match ) { + match = tokenize( selector ); + } + i = match.length; + while ( i-- ) { + cached = matcherFromTokens( match[i] ); + if ( cached[ expando ] ) { + setMatchers.push( cached ); + } else { + elementMatchers.push( cached ); + } + } + + // Cache the compiled function + cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) ); + + // Save selector and tokenization + cached.selector = selector; + } + return cached; +}; + +/** + * A low-level selection function that works with Sizzle's compiled + * selector functions + * @param {String|Function} selector A selector or a pre-compiled + * selector function built with Sizzle.compile + * @param {Element} context + * @param {Array} [results] + * @param {Array} [seed] A set of elements to match against + */ +select = Sizzle.select = function( selector, context, results, seed ) { + var i, tokens, token, type, find, + compiled = typeof selector === "function" && selector, + match = !seed && tokenize( (selector = compiled.selector || selector) ); + + results = results || []; + + // Try to minimize operations if there is only one selector in the list and no seed + // (the latter of which guarantees us context) + if ( match.length === 1 ) { + + // Reduce context if the leading compound selector is an ID + tokens = match[0] = match[0].slice( 0 ); + if ( tokens.length > 2 && (token = tokens[0]).type === "ID" && + context.nodeType === 9 && documentIsHTML && Expr.relative[ tokens[1].type ] ) { + + context = ( Expr.find["ID"]( token.matches[0].replace(runescape, funescape), context ) || [] )[0]; + if ( !context ) { + return results; + + // Precompiled matchers will still verify ancestry, so step up a level + } else if ( compiled ) { + context = context.parentNode; + } + + selector = selector.slice( tokens.shift().value.length ); + } + + // Fetch a seed set for right-to-left matching + i = matchExpr["needsContext"].test( selector ) ? 0 : tokens.length; + while ( i-- ) { + token = tokens[i]; + + // Abort if we hit a combinator + if ( Expr.relative[ (type = token.type) ] ) { + break; + } + if ( (find = Expr.find[ type ]) ) { + // Search, expanding context for leading sibling combinators + if ( (seed = find( + token.matches[0].replace( runescape, funescape ), + rsibling.test( tokens[0].type ) && testContext( context.parentNode ) || context + )) ) { + + // If seed is empty or no tokens remain, we can return early + tokens.splice( i, 1 ); + selector = seed.length && toSelector( tokens ); + if ( !selector ) { + push.apply( results, seed ); + return results; + } + + break; + } + } + } + } + + // Compile and execute a filtering function if one is not provided + // Provide `match` to avoid retokenization if we modified the selector above + ( compiled || compile( selector, match ) )( + seed, + context, + !documentIsHTML, + results, + !context || rsibling.test( selector ) && testContext( context.parentNode ) || context + ); + return results; +}; + +// One-time assignments + +// Sort stability +support.sortStable = expando.split("").sort( sortOrder ).join("") === expando; + +// Support: Chrome 14-35+ +// Always assume duplicates if they aren't passed to the comparison function +support.detectDuplicates = !!hasDuplicate; + +// Initialize against the default document +setDocument(); + +// Support: Webkit<537.32 - Safari 6.0.3/Chrome 25 (fixed in Chrome 27) +// Detached nodes confoundingly follow *each other* +support.sortDetached = assert(function( el ) { + // Should return 1, but returns 4 (following) + return el.compareDocumentPosition( document.createElement("fieldset") ) & 1; +}); + +// Support: IE<8 +// Prevent attribute/property "interpolation" +// https://msdn.microsoft.com/en-us/library/ms536429%28VS.85%29.aspx +if ( !assert(function( el ) { + el.innerHTML = ""; + return el.firstChild.getAttribute("href") === "#" ; +}) ) { + addHandle( "type|href|height|width", function( elem, name, isXML ) { + if ( !isXML ) { + return elem.getAttribute( name, name.toLowerCase() === "type" ? 1 : 2 ); + } + }); +} + +// Support: IE<9 +// Use defaultValue in place of getAttribute("value") +if ( !support.attributes || !assert(function( el ) { + el.innerHTML = ""; + el.firstChild.setAttribute( "value", "" ); + return el.firstChild.getAttribute( "value" ) === ""; +}) ) { + addHandle( "value", function( elem, name, isXML ) { + if ( !isXML && elem.nodeName.toLowerCase() === "input" ) { + return elem.defaultValue; + } + }); +} + +// Support: IE<9 +// Use getAttributeNode to fetch booleans when getAttribute lies +if ( !assert(function( el ) { + return el.getAttribute("disabled") == null; +}) ) { + addHandle( booleans, function( elem, name, isXML ) { + var val; + if ( !isXML ) { + return elem[ name ] === true ? name.toLowerCase() : + (val = elem.getAttributeNode( name )) && val.specified ? + val.value : + null; + } + }); +} + +return Sizzle; + +})( window ); + + + +jQuery.find = Sizzle; +jQuery.expr = Sizzle.selectors; + +// Deprecated +jQuery.expr[ ":" ] = jQuery.expr.pseudos; +jQuery.uniqueSort = jQuery.unique = Sizzle.uniqueSort; +jQuery.text = Sizzle.getText; +jQuery.isXMLDoc = Sizzle.isXML; +jQuery.contains = Sizzle.contains; +jQuery.escapeSelector = Sizzle.escape; + + + + +var dir = function( elem, dir, until ) { + var matched = [], + truncate = until !== undefined; + + while ( ( elem = elem[ dir ] ) && elem.nodeType !== 9 ) { + if ( elem.nodeType === 1 ) { + if ( truncate && jQuery( elem ).is( until ) ) { + break; + } + matched.push( elem ); + } + } + return matched; +}; + + +var siblings = function( n, elem ) { + var matched = []; + + for ( ; n; n = n.nextSibling ) { + if ( n.nodeType === 1 && n !== elem ) { + matched.push( n ); + } + } + + return matched; +}; + + +var rneedsContext = jQuery.expr.match.needsContext; + + + +function nodeName( elem, name ) { + + return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); + +}; +var rsingleTag = ( /^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i ); + + + +var risSimple = /^.[^:#\[\.,]*$/; + +// Implement the identical functionality for filter and not +function winnow( elements, qualifier, not ) { + if ( jQuery.isFunction( qualifier ) ) { + return jQuery.grep( elements, function( elem, i ) { + return !!qualifier.call( elem, i, elem ) !== not; + } ); + } + + // Single element + if ( qualifier.nodeType ) { + return jQuery.grep( elements, function( elem ) { + return ( elem === qualifier ) !== not; + } ); + } + + // Arraylike of elements (jQuery, arguments, Array) + if ( typeof qualifier !== "string" ) { + return jQuery.grep( elements, function( elem ) { + return ( indexOf.call( qualifier, elem ) > -1 ) !== not; + } ); + } + + // Simple selector that can be filtered directly, removing non-Elements + if ( risSimple.test( qualifier ) ) { + return jQuery.filter( qualifier, elements, not ); + } + + // Complex selector, compare the two sets, removing non-Elements + qualifier = jQuery.filter( qualifier, elements ); + return jQuery.grep( elements, function( elem ) { + return ( indexOf.call( qualifier, elem ) > -1 ) !== not && elem.nodeType === 1; + } ); +} + +jQuery.filter = function( expr, elems, not ) { + var elem = elems[ 0 ]; + + if ( not ) { + expr = ":not(" + expr + ")"; + } + + if ( elems.length === 1 && elem.nodeType === 1 ) { + return jQuery.find.matchesSelector( elem, expr ) ? [ elem ] : []; + } + + return jQuery.find.matches( expr, jQuery.grep( elems, function( elem ) { + return elem.nodeType === 1; + } ) ); +}; + +jQuery.fn.extend( { + find: function( selector ) { + var i, ret, + len = this.length, + self = this; + + if ( typeof selector !== "string" ) { + return this.pushStack( jQuery( selector ).filter( function() { + for ( i = 0; i < len; i++ ) { + if ( jQuery.contains( self[ i ], this ) ) { + return true; + } + } + } ) ); + } + + ret = this.pushStack( [] ); + + for ( i = 0; i < len; i++ ) { + jQuery.find( selector, self[ i ], ret ); + } + + return len > 1 ? jQuery.uniqueSort( ret ) : ret; + }, + filter: function( selector ) { + return this.pushStack( winnow( this, selector || [], false ) ); + }, + not: function( selector ) { + return this.pushStack( winnow( this, selector || [], true ) ); + }, + is: function( selector ) { + return !!winnow( + this, + + // If this is a positional/relative selector, check membership in the returned set + // so $("p:first").is("p:last") won't return true for a doc with two "p". + typeof selector === "string" && rneedsContext.test( selector ) ? + jQuery( selector ) : + selector || [], + false + ).length; + } +} ); + + +// Initialize a jQuery object + + +// A central reference to the root jQuery(document) +var rootjQuery, + + // A simple way to check for HTML strings + // Prioritize #id over to avoid XSS via location.hash (#9521) + // Strict HTML recognition (#11290: must start with <) + // Shortcut simple #id case for speed + rquickExpr = /^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/, + + init = jQuery.fn.init = function( selector, context, root ) { + var match, elem; + + // HANDLE: $(""), $(null), $(undefined), $(false) + if ( !selector ) { + return this; + } + + // Method init() accepts an alternate rootjQuery + // so migrate can support jQuery.sub (gh-2101) + root = root || rootjQuery; + + // Handle HTML strings + if ( typeof selector === "string" ) { + if ( selector[ 0 ] === "<" && + selector[ selector.length - 1 ] === ">" && + selector.length >= 3 ) { + + // Assume that strings that start and end with <> are HTML and skip the regex check + match = [ null, selector, null ]; + + } else { + match = rquickExpr.exec( selector ); + } + + // Match html or make sure no context is specified for #id + if ( match && ( match[ 1 ] || !context ) ) { + + // HANDLE: $(html) -> $(array) + if ( match[ 1 ] ) { + context = context instanceof jQuery ? context[ 0 ] : context; + + // Option to run scripts is true for back-compat + // Intentionally let the error be thrown if parseHTML is not present + jQuery.merge( this, jQuery.parseHTML( + match[ 1 ], + context && context.nodeType ? context.ownerDocument || context : document, + true + ) ); + + // HANDLE: $(html, props) + if ( rsingleTag.test( match[ 1 ] ) && jQuery.isPlainObject( context ) ) { + for ( match in context ) { + + // Properties of context are called as methods if possible + if ( jQuery.isFunction( this[ match ] ) ) { + this[ match ]( context[ match ] ); + + // ...and otherwise set as attributes + } else { + this.attr( match, context[ match ] ); + } + } + } + + return this; + + // HANDLE: $(#id) + } else { + elem = document.getElementById( match[ 2 ] ); + + if ( elem ) { + + // Inject the element directly into the jQuery object + this[ 0 ] = elem; + this.length = 1; + } + return this; + } + + // HANDLE: $(expr, $(...)) + } else if ( !context || context.jquery ) { + return ( context || root ).find( selector ); + + // HANDLE: $(expr, context) + // (which is just equivalent to: $(context).find(expr) + } else { + return this.constructor( context ).find( selector ); + } + + // HANDLE: $(DOMElement) + } else if ( selector.nodeType ) { + this[ 0 ] = selector; + this.length = 1; + return this; + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) { + return root.ready !== undefined ? + root.ready( selector ) : + + // Execute immediately if ready is not present + selector( jQuery ); + } + + return jQuery.makeArray( selector, this ); + }; + +// Give the init function the jQuery prototype for later instantiation +init.prototype = jQuery.fn; + +// Initialize central reference +rootjQuery = jQuery( document ); + + +var rparentsprev = /^(?:parents|prev(?:Until|All))/, + + // Methods guaranteed to produce a unique set when starting from a unique set + guaranteedUnique = { + children: true, + contents: true, + next: true, + prev: true + }; + +jQuery.fn.extend( { + has: function( target ) { + var targets = jQuery( target, this ), + l = targets.length; + + return this.filter( function() { + var i = 0; + for ( ; i < l; i++ ) { + if ( jQuery.contains( this, targets[ i ] ) ) { + return true; + } + } + } ); + }, + + closest: function( selectors, context ) { + var cur, + i = 0, + l = this.length, + matched = [], + targets = typeof selectors !== "string" && jQuery( selectors ); + + // Positional selectors never match, since there's no _selection_ context + if ( !rneedsContext.test( selectors ) ) { + for ( ; i < l; i++ ) { + for ( cur = this[ i ]; cur && cur !== context; cur = cur.parentNode ) { + + // Always skip document fragments + if ( cur.nodeType < 11 && ( targets ? + targets.index( cur ) > -1 : + + // Don't pass non-elements to Sizzle + cur.nodeType === 1 && + jQuery.find.matchesSelector( cur, selectors ) ) ) { + + matched.push( cur ); + break; + } + } + } + } + + return this.pushStack( matched.length > 1 ? jQuery.uniqueSort( matched ) : matched ); + }, + + // Determine the position of an element within the set + index: function( elem ) { + + // No argument, return index in parent + if ( !elem ) { + return ( this[ 0 ] && this[ 0 ].parentNode ) ? this.first().prevAll().length : -1; + } + + // Index in selector + if ( typeof elem === "string" ) { + return indexOf.call( jQuery( elem ), this[ 0 ] ); + } + + // Locate the position of the desired element + return indexOf.call( this, + + // If it receives a jQuery object, the first element is used + elem.jquery ? elem[ 0 ] : elem + ); + }, + + add: function( selector, context ) { + return this.pushStack( + jQuery.uniqueSort( + jQuery.merge( this.get(), jQuery( selector, context ) ) + ) + ); + }, + + addBack: function( selector ) { + return this.add( selector == null ? + this.prevObject : this.prevObject.filter( selector ) + ); + } +} ); + +function sibling( cur, dir ) { + while ( ( cur = cur[ dir ] ) && cur.nodeType !== 1 ) {} + return cur; +} + +jQuery.each( { + parent: function( elem ) { + var parent = elem.parentNode; + return parent && parent.nodeType !== 11 ? parent : null; + }, + parents: function( elem ) { + return dir( elem, "parentNode" ); + }, + parentsUntil: function( elem, i, until ) { + return dir( elem, "parentNode", until ); + }, + next: function( elem ) { + return sibling( elem, "nextSibling" ); + }, + prev: function( elem ) { + return sibling( elem, "previousSibling" ); + }, + nextAll: function( elem ) { + return dir( elem, "nextSibling" ); + }, + prevAll: function( elem ) { + return dir( elem, "previousSibling" ); + }, + nextUntil: function( elem, i, until ) { + return dir( elem, "nextSibling", until ); + }, + prevUntil: function( elem, i, until ) { + return dir( elem, "previousSibling", until ); + }, + siblings: function( elem ) { + return siblings( ( elem.parentNode || {} ).firstChild, elem ); + }, + children: function( elem ) { + return siblings( elem.firstChild ); + }, + contents: function( elem ) { + if ( nodeName( elem, "iframe" ) ) { + return elem.contentDocument; + } + + // Support: IE 9 - 11 only, iOS 7 only, Android Browser <=4.3 only + // Treat the template element as a regular one in browsers that + // don't support it. + if ( nodeName( elem, "template" ) ) { + elem = elem.content || elem; + } + + return jQuery.merge( [], elem.childNodes ); + } +}, function( name, fn ) { + jQuery.fn[ name ] = function( until, selector ) { + var matched = jQuery.map( this, fn, until ); + + if ( name.slice( -5 ) !== "Until" ) { + selector = until; + } + + if ( selector && typeof selector === "string" ) { + matched = jQuery.filter( selector, matched ); + } + + if ( this.length > 1 ) { + + // Remove duplicates + if ( !guaranteedUnique[ name ] ) { + jQuery.uniqueSort( matched ); + } + + // Reverse order for parents* and prev-derivatives + if ( rparentsprev.test( name ) ) { + matched.reverse(); + } + } + + return this.pushStack( matched ); + }; +} ); +var rnothtmlwhite = ( /[^\x20\t\r\n\f]+/g ); + + + +// Convert String-formatted options into Object-formatted ones +function createOptions( options ) { + var object = {}; + jQuery.each( options.match( rnothtmlwhite ) || [], function( _, flag ) { + object[ flag ] = true; + } ); + return object; +} + +/* + * Create a callback list using the following parameters: + * + * options: an optional list of space-separated options that will change how + * the callback list behaves or a more traditional option object + * + * By default a callback list will act like an event callback list and can be + * "fired" multiple times. + * + * Possible options: + * + * once: will ensure the callback list can only be fired once (like a Deferred) + * + * memory: will keep track of previous values and will call any callback added + * after the list has been fired right away with the latest "memorized" + * values (like a Deferred) + * + * unique: will ensure a callback can only be added once (no duplicate in the list) + * + * stopOnFalse: interrupt callings when a callback returns false + * + */ +jQuery.Callbacks = function( options ) { + + // Convert options from String-formatted to Object-formatted if needed + // (we check in cache first) + options = typeof options === "string" ? + createOptions( options ) : + jQuery.extend( {}, options ); + + var // Flag to know if list is currently firing + firing, + + // Last fire value for non-forgettable lists + memory, + + // Flag to know if list was already fired + fired, + + // Flag to prevent firing + locked, + + // Actual callback list + list = [], + + // Queue of execution data for repeatable lists + queue = [], + + // Index of currently firing callback (modified by add/remove as needed) + firingIndex = -1, + + // Fire callbacks + fire = function() { + + // Enforce single-firing + locked = locked || options.once; + + // Execute callbacks for all pending executions, + // respecting firingIndex overrides and runtime changes + fired = firing = true; + for ( ; queue.length; firingIndex = -1 ) { + memory = queue.shift(); + while ( ++firingIndex < list.length ) { + + // Run callback and check for early termination + if ( list[ firingIndex ].apply( memory[ 0 ], memory[ 1 ] ) === false && + options.stopOnFalse ) { + + // Jump to end and forget the data so .add doesn't re-fire + firingIndex = list.length; + memory = false; + } + } + } + + // Forget the data if we're done with it + if ( !options.memory ) { + memory = false; + } + + firing = false; + + // Clean up if we're done firing for good + if ( locked ) { + + // Keep an empty list if we have data for future add calls + if ( memory ) { + list = []; + + // Otherwise, this object is spent + } else { + list = ""; + } + } + }, + + // Actual Callbacks object + self = { + + // Add a callback or a collection of callbacks to the list + add: function() { + if ( list ) { + + // If we have memory from a past run, we should fire after adding + if ( memory && !firing ) { + firingIndex = list.length - 1; + queue.push( memory ); + } + + ( function add( args ) { + jQuery.each( args, function( _, arg ) { + if ( jQuery.isFunction( arg ) ) { + if ( !options.unique || !self.has( arg ) ) { + list.push( arg ); + } + } else if ( arg && arg.length && jQuery.type( arg ) !== "string" ) { + + // Inspect recursively + add( arg ); + } + } ); + } )( arguments ); + + if ( memory && !firing ) { + fire(); + } + } + return this; + }, + + // Remove a callback from the list + remove: function() { + jQuery.each( arguments, function( _, arg ) { + var index; + while ( ( index = jQuery.inArray( arg, list, index ) ) > -1 ) { + list.splice( index, 1 ); + + // Handle firing indexes + if ( index <= firingIndex ) { + firingIndex--; + } + } + } ); + return this; + }, + + // Check if a given callback is in the list. + // If no argument is given, return whether or not list has callbacks attached. + has: function( fn ) { + return fn ? + jQuery.inArray( fn, list ) > -1 : + list.length > 0; + }, + + // Remove all callbacks from the list + empty: function() { + if ( list ) { + list = []; + } + return this; + }, + + // Disable .fire and .add + // Abort any current/pending executions + // Clear all callbacks and values + disable: function() { + locked = queue = []; + list = memory = ""; + return this; + }, + disabled: function() { + return !list; + }, + + // Disable .fire + // Also disable .add unless we have memory (since it would have no effect) + // Abort any pending executions + lock: function() { + locked = queue = []; + if ( !memory && !firing ) { + list = memory = ""; + } + return this; + }, + locked: function() { + return !!locked; + }, + + // Call all callbacks with the given context and arguments + fireWith: function( context, args ) { + if ( !locked ) { + args = args || []; + args = [ context, args.slice ? args.slice() : args ]; + queue.push( args ); + if ( !firing ) { + fire(); + } + } + return this; + }, + + // Call all the callbacks with the given arguments + fire: function() { + self.fireWith( this, arguments ); + return this; + }, + + // To know if the callbacks have already been called at least once + fired: function() { + return !!fired; + } + }; + + return self; +}; + + +function Identity( v ) { + return v; +} +function Thrower( ex ) { + throw ex; +} + +function adoptValue( value, resolve, reject, noValue ) { + var method; + + try { + + // Check for promise aspect first to privilege synchronous behavior + if ( value && jQuery.isFunction( ( method = value.promise ) ) ) { + method.call( value ).done( resolve ).fail( reject ); + + // Other thenables + } else if ( value && jQuery.isFunction( ( method = value.then ) ) ) { + method.call( value, resolve, reject ); + + // Other non-thenables + } else { + + // Control `resolve` arguments by letting Array#slice cast boolean `noValue` to integer: + // * false: [ value ].slice( 0 ) => resolve( value ) + // * true: [ value ].slice( 1 ) => resolve() + resolve.apply( undefined, [ value ].slice( noValue ) ); + } + + // For Promises/A+, convert exceptions into rejections + // Since jQuery.when doesn't unwrap thenables, we can skip the extra checks appearing in + // Deferred#then to conditionally suppress rejection. + } catch ( value ) { + + // Support: Android 4.0 only + // Strict mode functions invoked without .call/.apply get global-object context + reject.apply( undefined, [ value ] ); + } +} + +jQuery.extend( { + + Deferred: function( func ) { + var tuples = [ + + // action, add listener, callbacks, + // ... .then handlers, argument index, [final state] + [ "notify", "progress", jQuery.Callbacks( "memory" ), + jQuery.Callbacks( "memory" ), 2 ], + [ "resolve", "done", jQuery.Callbacks( "once memory" ), + jQuery.Callbacks( "once memory" ), 0, "resolved" ], + [ "reject", "fail", jQuery.Callbacks( "once memory" ), + jQuery.Callbacks( "once memory" ), 1, "rejected" ] + ], + state = "pending", + promise = { + state: function() { + return state; + }, + always: function() { + deferred.done( arguments ).fail( arguments ); + return this; + }, + "catch": function( fn ) { + return promise.then( null, fn ); + }, + + // Keep pipe for back-compat + pipe: function( /* fnDone, fnFail, fnProgress */ ) { + var fns = arguments; + + return jQuery.Deferred( function( newDefer ) { + jQuery.each( tuples, function( i, tuple ) { + + // Map tuples (progress, done, fail) to arguments (done, fail, progress) + var fn = jQuery.isFunction( fns[ tuple[ 4 ] ] ) && fns[ tuple[ 4 ] ]; + + // deferred.progress(function() { bind to newDefer or newDefer.notify }) + // deferred.done(function() { bind to newDefer or newDefer.resolve }) + // deferred.fail(function() { bind to newDefer or newDefer.reject }) + deferred[ tuple[ 1 ] ]( function() { + var returned = fn && fn.apply( this, arguments ); + if ( returned && jQuery.isFunction( returned.promise ) ) { + returned.promise() + .progress( newDefer.notify ) + .done( newDefer.resolve ) + .fail( newDefer.reject ); + } else { + newDefer[ tuple[ 0 ] + "With" ]( + this, + fn ? [ returned ] : arguments + ); + } + } ); + } ); + fns = null; + } ).promise(); + }, + then: function( onFulfilled, onRejected, onProgress ) { + var maxDepth = 0; + function resolve( depth, deferred, handler, special ) { + return function() { + var that = this, + args = arguments, + mightThrow = function() { + var returned, then; + + // Support: Promises/A+ section 2.3.3.3.3 + // https://promisesaplus.com/#point-59 + // Ignore double-resolution attempts + if ( depth < maxDepth ) { + return; + } + + returned = handler.apply( that, args ); + + // Support: Promises/A+ section 2.3.1 + // https://promisesaplus.com/#point-48 + if ( returned === deferred.promise() ) { + throw new TypeError( "Thenable self-resolution" ); + } + + // Support: Promises/A+ sections 2.3.3.1, 3.5 + // https://promisesaplus.com/#point-54 + // https://promisesaplus.com/#point-75 + // Retrieve `then` only once + then = returned && + + // Support: Promises/A+ section 2.3.4 + // https://promisesaplus.com/#point-64 + // Only check objects and functions for thenability + ( typeof returned === "object" || + typeof returned === "function" ) && + returned.then; + + // Handle a returned thenable + if ( jQuery.isFunction( then ) ) { + + // Special processors (notify) just wait for resolution + if ( special ) { + then.call( + returned, + resolve( maxDepth, deferred, Identity, special ), + resolve( maxDepth, deferred, Thrower, special ) + ); + + // Normal processors (resolve) also hook into progress + } else { + + // ...and disregard older resolution values + maxDepth++; + + then.call( + returned, + resolve( maxDepth, deferred, Identity, special ), + resolve( maxDepth, deferred, Thrower, special ), + resolve( maxDepth, deferred, Identity, + deferred.notifyWith ) + ); + } + + // Handle all other returned values + } else { + + // Only substitute handlers pass on context + // and multiple values (non-spec behavior) + if ( handler !== Identity ) { + that = undefined; + args = [ returned ]; + } + + // Process the value(s) + // Default process is resolve + ( special || deferred.resolveWith )( that, args ); + } + }, + + // Only normal processors (resolve) catch and reject exceptions + process = special ? + mightThrow : + function() { + try { + mightThrow(); + } catch ( e ) { + + if ( jQuery.Deferred.exceptionHook ) { + jQuery.Deferred.exceptionHook( e, + process.stackTrace ); + } + + // Support: Promises/A+ section 2.3.3.3.4.1 + // https://promisesaplus.com/#point-61 + // Ignore post-resolution exceptions + if ( depth + 1 >= maxDepth ) { + + // Only substitute handlers pass on context + // and multiple values (non-spec behavior) + if ( handler !== Thrower ) { + that = undefined; + args = [ e ]; + } + + deferred.rejectWith( that, args ); + } + } + }; + + // Support: Promises/A+ section 2.3.3.3.1 + // https://promisesaplus.com/#point-57 + // Re-resolve promises immediately to dodge false rejection from + // subsequent errors + if ( depth ) { + process(); + } else { + + // Call an optional hook to record the stack, in case of exception + // since it's otherwise lost when execution goes async + if ( jQuery.Deferred.getStackHook ) { + process.stackTrace = jQuery.Deferred.getStackHook(); + } + window.setTimeout( process ); + } + }; + } + + return jQuery.Deferred( function( newDefer ) { + + // progress_handlers.add( ... ) + tuples[ 0 ][ 3 ].add( + resolve( + 0, + newDefer, + jQuery.isFunction( onProgress ) ? + onProgress : + Identity, + newDefer.notifyWith + ) + ); + + // fulfilled_handlers.add( ... ) + tuples[ 1 ][ 3 ].add( + resolve( + 0, + newDefer, + jQuery.isFunction( onFulfilled ) ? + onFulfilled : + Identity + ) + ); + + // rejected_handlers.add( ... ) + tuples[ 2 ][ 3 ].add( + resolve( + 0, + newDefer, + jQuery.isFunction( onRejected ) ? + onRejected : + Thrower + ) + ); + } ).promise(); + }, + + // Get a promise for this deferred + // If obj is provided, the promise aspect is added to the object + promise: function( obj ) { + return obj != null ? jQuery.extend( obj, promise ) : promise; + } + }, + deferred = {}; + + // Add list-specific methods + jQuery.each( tuples, function( i, tuple ) { + var list = tuple[ 2 ], + stateString = tuple[ 5 ]; + + // promise.progress = list.add + // promise.done = list.add + // promise.fail = list.add + promise[ tuple[ 1 ] ] = list.add; + + // Handle state + if ( stateString ) { + list.add( + function() { + + // state = "resolved" (i.e., fulfilled) + // state = "rejected" + state = stateString; + }, + + // rejected_callbacks.disable + // fulfilled_callbacks.disable + tuples[ 3 - i ][ 2 ].disable, + + // progress_callbacks.lock + tuples[ 0 ][ 2 ].lock + ); + } + + // progress_handlers.fire + // fulfilled_handlers.fire + // rejected_handlers.fire + list.add( tuple[ 3 ].fire ); + + // deferred.notify = function() { deferred.notifyWith(...) } + // deferred.resolve = function() { deferred.resolveWith(...) } + // deferred.reject = function() { deferred.rejectWith(...) } + deferred[ tuple[ 0 ] ] = function() { + deferred[ tuple[ 0 ] + "With" ]( this === deferred ? undefined : this, arguments ); + return this; + }; + + // deferred.notifyWith = list.fireWith + // deferred.resolveWith = list.fireWith + // deferred.rejectWith = list.fireWith + deferred[ tuple[ 0 ] + "With" ] = list.fireWith; + } ); + + // Make the deferred a promise + promise.promise( deferred ); + + // Call given func if any + if ( func ) { + func.call( deferred, deferred ); + } + + // All done! + return deferred; + }, + + // Deferred helper + when: function( singleValue ) { + var + + // count of uncompleted subordinates + remaining = arguments.length, + + // count of unprocessed arguments + i = remaining, + + // subordinate fulfillment data + resolveContexts = Array( i ), + resolveValues = slice.call( arguments ), + + // the master Deferred + master = jQuery.Deferred(), + + // subordinate callback factory + updateFunc = function( i ) { + return function( value ) { + resolveContexts[ i ] = this; + resolveValues[ i ] = arguments.length > 1 ? slice.call( arguments ) : value; + if ( !( --remaining ) ) { + master.resolveWith( resolveContexts, resolveValues ); + } + }; + }; + + // Single- and empty arguments are adopted like Promise.resolve + if ( remaining <= 1 ) { + adoptValue( singleValue, master.done( updateFunc( i ) ).resolve, master.reject, + !remaining ); + + // Use .then() to unwrap secondary thenables (cf. gh-3000) + if ( master.state() === "pending" || + jQuery.isFunction( resolveValues[ i ] && resolveValues[ i ].then ) ) { + + return master.then(); + } + } + + // Multiple arguments are aggregated like Promise.all array elements + while ( i-- ) { + adoptValue( resolveValues[ i ], updateFunc( i ), master.reject ); + } + + return master.promise(); + } +} ); + + +// These usually indicate a programmer mistake during development, +// warn about them ASAP rather than swallowing them by default. +var rerrorNames = /^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/; + +jQuery.Deferred.exceptionHook = function( error, stack ) { + + // Support: IE 8 - 9 only + // Console exists when dev tools are open, which can happen at any time + if ( window.console && window.console.warn && error && rerrorNames.test( error.name ) ) { + window.console.warn( "jQuery.Deferred exception: " + error.message, error.stack, stack ); + } +}; + + + + +jQuery.readyException = function( error ) { + window.setTimeout( function() { + throw error; + } ); +}; + + + + +// The deferred used on DOM ready +var readyList = jQuery.Deferred(); + +jQuery.fn.ready = function( fn ) { + + readyList + .then( fn ) + + // Wrap jQuery.readyException in a function so that the lookup + // happens at the time of error handling instead of callback + // registration. + .catch( function( error ) { + jQuery.readyException( error ); + } ); + + return this; +}; + +jQuery.extend( { + + // Is the DOM ready to be used? Set to true once it occurs. + isReady: false, + + // A counter to track how many items to wait for before + // the ready event fires. See #6781 + readyWait: 1, + + // Handle when the DOM is ready + ready: function( wait ) { + + // Abort if there are pending holds or we're already ready + if ( wait === true ? --jQuery.readyWait : jQuery.isReady ) { + return; + } + + // Remember that the DOM is ready + jQuery.isReady = true; + + // If a normal DOM Ready event fired, decrement, and wait if need be + if ( wait !== true && --jQuery.readyWait > 0 ) { + return; + } + + // If there are functions bound, to execute + readyList.resolveWith( document, [ jQuery ] ); + } +} ); + +jQuery.ready.then = readyList.then; + +// The ready event handler and self cleanup method +function completed() { + document.removeEventListener( "DOMContentLoaded", completed ); + window.removeEventListener( "load", completed ); + jQuery.ready(); +} + +// Catch cases where $(document).ready() is called +// after the browser event has already occurred. +// Support: IE <=9 - 10 only +// Older IE sometimes signals "interactive" too soon +if ( document.readyState === "complete" || + ( document.readyState !== "loading" && !document.documentElement.doScroll ) ) { + + // Handle it asynchronously to allow scripts the opportunity to delay ready + window.setTimeout( jQuery.ready ); + +} else { + + // Use the handy event callback + document.addEventListener( "DOMContentLoaded", completed ); + + // A fallback to window.onload, that will always work + window.addEventListener( "load", completed ); +} + + + + +// Multifunctional method to get and set values of a collection +// The value/s can optionally be executed if it's a function +var access = function( elems, fn, key, value, chainable, emptyGet, raw ) { + var i = 0, + len = elems.length, + bulk = key == null; + + // Sets many values + if ( jQuery.type( key ) === "object" ) { + chainable = true; + for ( i in key ) { + access( elems, fn, i, key[ i ], true, emptyGet, raw ); + } + + // Sets one value + } else if ( value !== undefined ) { + chainable = true; + + if ( !jQuery.isFunction( value ) ) { + raw = true; + } + + if ( bulk ) { + + // Bulk operations run against the entire set + if ( raw ) { + fn.call( elems, value ); + fn = null; + + // ...except when executing function values + } else { + bulk = fn; + fn = function( elem, key, value ) { + return bulk.call( jQuery( elem ), value ); + }; + } + } + + if ( fn ) { + for ( ; i < len; i++ ) { + fn( + elems[ i ], key, raw ? + value : + value.call( elems[ i ], i, fn( elems[ i ], key ) ) + ); + } + } + } + + if ( chainable ) { + return elems; + } + + // Gets + if ( bulk ) { + return fn.call( elems ); + } + + return len ? fn( elems[ 0 ], key ) : emptyGet; +}; +var acceptData = function( owner ) { + + // Accepts only: + // - Node + // - Node.ELEMENT_NODE + // - Node.DOCUMENT_NODE + // - Object + // - Any + return owner.nodeType === 1 || owner.nodeType === 9 || !( +owner.nodeType ); +}; + + + + +function Data() { + this.expando = jQuery.expando + Data.uid++; +} + +Data.uid = 1; + +Data.prototype = { + + cache: function( owner ) { + + // Check if the owner object already has a cache + var value = owner[ this.expando ]; + + // If not, create one + if ( !value ) { + value = {}; + + // We can accept data for non-element nodes in modern browsers, + // but we should not, see #8335. + // Always return an empty object. + if ( acceptData( owner ) ) { + + // If it is a node unlikely to be stringify-ed or looped over + // use plain assignment + if ( owner.nodeType ) { + owner[ this.expando ] = value; + + // Otherwise secure it in a non-enumerable property + // configurable must be true to allow the property to be + // deleted when data is removed + } else { + Object.defineProperty( owner, this.expando, { + value: value, + configurable: true + } ); + } + } + } + + return value; + }, + set: function( owner, data, value ) { + var prop, + cache = this.cache( owner ); + + // Handle: [ owner, key, value ] args + // Always use camelCase key (gh-2257) + if ( typeof data === "string" ) { + cache[ jQuery.camelCase( data ) ] = value; + + // Handle: [ owner, { properties } ] args + } else { + + // Copy the properties one-by-one to the cache object + for ( prop in data ) { + cache[ jQuery.camelCase( prop ) ] = data[ prop ]; + } + } + return cache; + }, + get: function( owner, key ) { + return key === undefined ? + this.cache( owner ) : + + // Always use camelCase key (gh-2257) + owner[ this.expando ] && owner[ this.expando ][ jQuery.camelCase( key ) ]; + }, + access: function( owner, key, value ) { + + // In cases where either: + // + // 1. No key was specified + // 2. A string key was specified, but no value provided + // + // Take the "read" path and allow the get method to determine + // which value to return, respectively either: + // + // 1. The entire cache object + // 2. The data stored at the key + // + if ( key === undefined || + ( ( key && typeof key === "string" ) && value === undefined ) ) { + + return this.get( owner, key ); + } + + // When the key is not a string, or both a key and value + // are specified, set or extend (existing objects) with either: + // + // 1. An object of properties + // 2. A key and value + // + this.set( owner, key, value ); + + // Since the "set" path can have two possible entry points + // return the expected data based on which path was taken[*] + return value !== undefined ? value : key; + }, + remove: function( owner, key ) { + var i, + cache = owner[ this.expando ]; + + if ( cache === undefined ) { + return; + } + + if ( key !== undefined ) { + + // Support array or space separated string of keys + if ( Array.isArray( key ) ) { + + // If key is an array of keys... + // We always set camelCase keys, so remove that. + key = key.map( jQuery.camelCase ); + } else { + key = jQuery.camelCase( key ); + + // If a key with the spaces exists, use it. + // Otherwise, create an array by matching non-whitespace + key = key in cache ? + [ key ] : + ( key.match( rnothtmlwhite ) || [] ); + } + + i = key.length; + + while ( i-- ) { + delete cache[ key[ i ] ]; + } + } + + // Remove the expando if there's no more data + if ( key === undefined || jQuery.isEmptyObject( cache ) ) { + + // Support: Chrome <=35 - 45 + // Webkit & Blink performance suffers when deleting properties + // from DOM nodes, so set to undefined instead + // https://bugs.chromium.org/p/chromium/issues/detail?id=378607 (bug restricted) + if ( owner.nodeType ) { + owner[ this.expando ] = undefined; + } else { + delete owner[ this.expando ]; + } + } + }, + hasData: function( owner ) { + var cache = owner[ this.expando ]; + return cache !== undefined && !jQuery.isEmptyObject( cache ); + } +}; +var dataPriv = new Data(); + +var dataUser = new Data(); + + + +// Implementation Summary +// +// 1. Enforce API surface and semantic compatibility with 1.9.x branch +// 2. Improve the module's maintainability by reducing the storage +// paths to a single mechanism. +// 3. Use the same single mechanism to support "private" and "user" data. +// 4. _Never_ expose "private" data to user code (TODO: Drop _data, _removeData) +// 5. Avoid exposing implementation details on user objects (eg. expando properties) +// 6. Provide a clear path for implementation upgrade to WeakMap in 2014 + +var rbrace = /^(?:\{[\w\W]*\}|\[[\w\W]*\])$/, + rmultiDash = /[A-Z]/g; + +function getData( data ) { + if ( data === "true" ) { + return true; + } + + if ( data === "false" ) { + return false; + } + + if ( data === "null" ) { + return null; + } + + // Only convert to a number if it doesn't change the string + if ( data === +data + "" ) { + return +data; + } + + if ( rbrace.test( data ) ) { + return JSON.parse( data ); + } + + return data; +} + +function dataAttr( elem, key, data ) { + var name; + + // If nothing was found internally, try to fetch any + // data from the HTML5 data-* attribute + if ( data === undefined && elem.nodeType === 1 ) { + name = "data-" + key.replace( rmultiDash, "-$&" ).toLowerCase(); + data = elem.getAttribute( name ); + + if ( typeof data === "string" ) { + try { + data = getData( data ); + } catch ( e ) {} + + // Make sure we set the data so it isn't changed later + dataUser.set( elem, key, data ); + } else { + data = undefined; + } + } + return data; +} + +jQuery.extend( { + hasData: function( elem ) { + return dataUser.hasData( elem ) || dataPriv.hasData( elem ); + }, + + data: function( elem, name, data ) { + return dataUser.access( elem, name, data ); + }, + + removeData: function( elem, name ) { + dataUser.remove( elem, name ); + }, + + // TODO: Now that all calls to _data and _removeData have been replaced + // with direct calls to dataPriv methods, these can be deprecated. + _data: function( elem, name, data ) { + return dataPriv.access( elem, name, data ); + }, + + _removeData: function( elem, name ) { + dataPriv.remove( elem, name ); + } +} ); + +jQuery.fn.extend( { + data: function( key, value ) { + var i, name, data, + elem = this[ 0 ], + attrs = elem && elem.attributes; + + // Gets all values + if ( key === undefined ) { + if ( this.length ) { + data = dataUser.get( elem ); + + if ( elem.nodeType === 1 && !dataPriv.get( elem, "hasDataAttrs" ) ) { + i = attrs.length; + while ( i-- ) { + + // Support: IE 11 only + // The attrs elements can be null (#14894) + if ( attrs[ i ] ) { + name = attrs[ i ].name; + if ( name.indexOf( "data-" ) === 0 ) { + name = jQuery.camelCase( name.slice( 5 ) ); + dataAttr( elem, name, data[ name ] ); + } + } + } + dataPriv.set( elem, "hasDataAttrs", true ); + } + } + + return data; + } + + // Sets multiple values + if ( typeof key === "object" ) { + return this.each( function() { + dataUser.set( this, key ); + } ); + } + + return access( this, function( value ) { + var data; + + // The calling jQuery object (element matches) is not empty + // (and therefore has an element appears at this[ 0 ]) and the + // `value` parameter was not undefined. An empty jQuery object + // will result in `undefined` for elem = this[ 0 ] which will + // throw an exception if an attempt to read a data cache is made. + if ( elem && value === undefined ) { + + // Attempt to get data from the cache + // The key will always be camelCased in Data + data = dataUser.get( elem, key ); + if ( data !== undefined ) { + return data; + } + + // Attempt to "discover" the data in + // HTML5 custom data-* attrs + data = dataAttr( elem, key ); + if ( data !== undefined ) { + return data; + } + + // We tried really hard, but the data doesn't exist. + return; + } + + // Set the data... + this.each( function() { + + // We always store the camelCased key + dataUser.set( this, key, value ); + } ); + }, null, value, arguments.length > 1, null, true ); + }, + + removeData: function( key ) { + return this.each( function() { + dataUser.remove( this, key ); + } ); + } +} ); + + +jQuery.extend( { + queue: function( elem, type, data ) { + var queue; + + if ( elem ) { + type = ( type || "fx" ) + "queue"; + queue = dataPriv.get( elem, type ); + + // Speed up dequeue by getting out quickly if this is just a lookup + if ( data ) { + if ( !queue || Array.isArray( data ) ) { + queue = dataPriv.access( elem, type, jQuery.makeArray( data ) ); + } else { + queue.push( data ); + } + } + return queue || []; + } + }, + + dequeue: function( elem, type ) { + type = type || "fx"; + + var queue = jQuery.queue( elem, type ), + startLength = queue.length, + fn = queue.shift(), + hooks = jQuery._queueHooks( elem, type ), + next = function() { + jQuery.dequeue( elem, type ); + }; + + // If the fx queue is dequeued, always remove the progress sentinel + if ( fn === "inprogress" ) { + fn = queue.shift(); + startLength--; + } + + if ( fn ) { + + // Add a progress sentinel to prevent the fx queue from being + // automatically dequeued + if ( type === "fx" ) { + queue.unshift( "inprogress" ); + } + + // Clear up the last queue stop function + delete hooks.stop; + fn.call( elem, next, hooks ); + } + + if ( !startLength && hooks ) { + hooks.empty.fire(); + } + }, + + // Not public - generate a queueHooks object, or return the current one + _queueHooks: function( elem, type ) { + var key = type + "queueHooks"; + return dataPriv.get( elem, key ) || dataPriv.access( elem, key, { + empty: jQuery.Callbacks( "once memory" ).add( function() { + dataPriv.remove( elem, [ type + "queue", key ] ); + } ) + } ); + } +} ); + +jQuery.fn.extend( { + queue: function( type, data ) { + var setter = 2; + + if ( typeof type !== "string" ) { + data = type; + type = "fx"; + setter--; + } + + if ( arguments.length < setter ) { + return jQuery.queue( this[ 0 ], type ); + } + + return data === undefined ? + this : + this.each( function() { + var queue = jQuery.queue( this, type, data ); + + // Ensure a hooks for this queue + jQuery._queueHooks( this, type ); + + if ( type === "fx" && queue[ 0 ] !== "inprogress" ) { + jQuery.dequeue( this, type ); + } + } ); + }, + dequeue: function( type ) { + return this.each( function() { + jQuery.dequeue( this, type ); + } ); + }, + clearQueue: function( type ) { + return this.queue( type || "fx", [] ); + }, + + // Get a promise resolved when queues of a certain type + // are emptied (fx is the type by default) + promise: function( type, obj ) { + var tmp, + count = 1, + defer = jQuery.Deferred(), + elements = this, + i = this.length, + resolve = function() { + if ( !( --count ) ) { + defer.resolveWith( elements, [ elements ] ); + } + }; + + if ( typeof type !== "string" ) { + obj = type; + type = undefined; + } + type = type || "fx"; + + while ( i-- ) { + tmp = dataPriv.get( elements[ i ], type + "queueHooks" ); + if ( tmp && tmp.empty ) { + count++; + tmp.empty.add( resolve ); + } + } + resolve(); + return defer.promise( obj ); + } +} ); +var pnum = ( /[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/ ).source; + +var rcssNum = new RegExp( "^(?:([+-])=|)(" + pnum + ")([a-z%]*)$", "i" ); + + +var cssExpand = [ "Top", "Right", "Bottom", "Left" ]; + +var isHiddenWithinTree = function( elem, el ) { + + // isHiddenWithinTree might be called from jQuery#filter function; + // in that case, element will be second argument + elem = el || elem; + + // Inline style trumps all + return elem.style.display === "none" || + elem.style.display === "" && + + // Otherwise, check computed style + // Support: Firefox <=43 - 45 + // Disconnected elements can have computed display: none, so first confirm that elem is + // in the document. + jQuery.contains( elem.ownerDocument, elem ) && + + jQuery.css( elem, "display" ) === "none"; + }; + +var swap = function( elem, options, callback, args ) { + var ret, name, + old = {}; + + // Remember the old values, and insert the new ones + for ( name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + ret = callback.apply( elem, args || [] ); + + // Revert the old values + for ( name in options ) { + elem.style[ name ] = old[ name ]; + } + + return ret; +}; + + + + +function adjustCSS( elem, prop, valueParts, tween ) { + var adjusted, + scale = 1, + maxIterations = 20, + currentValue = tween ? + function() { + return tween.cur(); + } : + function() { + return jQuery.css( elem, prop, "" ); + }, + initial = currentValue(), + unit = valueParts && valueParts[ 3 ] || ( jQuery.cssNumber[ prop ] ? "" : "px" ), + + // Starting value computation is required for potential unit mismatches + initialInUnit = ( jQuery.cssNumber[ prop ] || unit !== "px" && +initial ) && + rcssNum.exec( jQuery.css( elem, prop ) ); + + if ( initialInUnit && initialInUnit[ 3 ] !== unit ) { + + // Trust units reported by jQuery.css + unit = unit || initialInUnit[ 3 ]; + + // Make sure we update the tween properties later on + valueParts = valueParts || []; + + // Iteratively approximate from a nonzero starting point + initialInUnit = +initial || 1; + + do { + + // If previous iteration zeroed out, double until we get *something*. + // Use string for doubling so we don't accidentally see scale as unchanged below + scale = scale || ".5"; + + // Adjust and apply + initialInUnit = initialInUnit / scale; + jQuery.style( elem, prop, initialInUnit + unit ); + + // Update scale, tolerating zero or NaN from tween.cur() + // Break the loop if scale is unchanged or perfect, or if we've just had enough. + } while ( + scale !== ( scale = currentValue() / initial ) && scale !== 1 && --maxIterations + ); + } + + if ( valueParts ) { + initialInUnit = +initialInUnit || +initial || 0; + + // Apply relative offset (+=/-=) if specified + adjusted = valueParts[ 1 ] ? + initialInUnit + ( valueParts[ 1 ] + 1 ) * valueParts[ 2 ] : + +valueParts[ 2 ]; + if ( tween ) { + tween.unit = unit; + tween.start = initialInUnit; + tween.end = adjusted; + } + } + return adjusted; +} + + +var defaultDisplayMap = {}; + +function getDefaultDisplay( elem ) { + var temp, + doc = elem.ownerDocument, + nodeName = elem.nodeName, + display = defaultDisplayMap[ nodeName ]; + + if ( display ) { + return display; + } + + temp = doc.body.appendChild( doc.createElement( nodeName ) ); + display = jQuery.css( temp, "display" ); + + temp.parentNode.removeChild( temp ); + + if ( display === "none" ) { + display = "block"; + } + defaultDisplayMap[ nodeName ] = display; + + return display; +} + +function showHide( elements, show ) { + var display, elem, + values = [], + index = 0, + length = elements.length; + + // Determine new display value for elements that need to change + for ( ; index < length; index++ ) { + elem = elements[ index ]; + if ( !elem.style ) { + continue; + } + + display = elem.style.display; + if ( show ) { + + // Since we force visibility upon cascade-hidden elements, an immediate (and slow) + // check is required in this first loop unless we have a nonempty display value (either + // inline or about-to-be-restored) + if ( display === "none" ) { + values[ index ] = dataPriv.get( elem, "display" ) || null; + if ( !values[ index ] ) { + elem.style.display = ""; + } + } + if ( elem.style.display === "" && isHiddenWithinTree( elem ) ) { + values[ index ] = getDefaultDisplay( elem ); + } + } else { + if ( display !== "none" ) { + values[ index ] = "none"; + + // Remember what we're overwriting + dataPriv.set( elem, "display", display ); + } + } + } + + // Set the display of the elements in a second loop to avoid constant reflow + for ( index = 0; index < length; index++ ) { + if ( values[ index ] != null ) { + elements[ index ].style.display = values[ index ]; + } + } + + return elements; +} + +jQuery.fn.extend( { + show: function() { + return showHide( this, true ); + }, + hide: function() { + return showHide( this ); + }, + toggle: function( state ) { + if ( typeof state === "boolean" ) { + return state ? this.show() : this.hide(); + } + + return this.each( function() { + if ( isHiddenWithinTree( this ) ) { + jQuery( this ).show(); + } else { + jQuery( this ).hide(); + } + } ); + } +} ); +var rcheckableType = ( /^(?:checkbox|radio)$/i ); + +var rtagName = ( /<([a-z][^\/\0>\x20\t\r\n\f]+)/i ); + +var rscriptType = ( /^$|\/(?:java|ecma)script/i ); + + + +// We have to close these tags to support XHTML (#13200) +var wrapMap = { + + // Support: IE <=9 only + option: [ 1, "" ], + + // XHTML parsers do not magically insert elements in the + // same way that tag soup parsers do. So we cannot shorten + // this by omitting or other required elements. + thead: [ 1, "", "
" ], + col: [ 2, "", "
" ], + tr: [ 2, "", "
" ], + td: [ 3, "", "
" ], + + _default: [ 0, "", "" ] +}; + +// Support: IE <=9 only +wrapMap.optgroup = wrapMap.option; + +wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; +wrapMap.th = wrapMap.td; + + +function getAll( context, tag ) { + + // Support: IE <=9 - 11 only + // Use typeof to avoid zero-argument method invocation on host objects (#15151) + var ret; + + if ( typeof context.getElementsByTagName !== "undefined" ) { + ret = context.getElementsByTagName( tag || "*" ); + + } else if ( typeof context.querySelectorAll !== "undefined" ) { + ret = context.querySelectorAll( tag || "*" ); + + } else { + ret = []; + } + + if ( tag === undefined || tag && nodeName( context, tag ) ) { + return jQuery.merge( [ context ], ret ); + } + + return ret; +} + + +// Mark scripts as having already been evaluated +function setGlobalEval( elems, refElements ) { + var i = 0, + l = elems.length; + + for ( ; i < l; i++ ) { + dataPriv.set( + elems[ i ], + "globalEval", + !refElements || dataPriv.get( refElements[ i ], "globalEval" ) + ); + } +} + + +var rhtml = /<|&#?\w+;/; + +function buildFragment( elems, context, scripts, selection, ignored ) { + var elem, tmp, tag, wrap, contains, j, + fragment = context.createDocumentFragment(), + nodes = [], + i = 0, + l = elems.length; + + for ( ; i < l; i++ ) { + elem = elems[ i ]; + + if ( elem || elem === 0 ) { + + // Add nodes directly + if ( jQuery.type( elem ) === "object" ) { + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( nodes, elem.nodeType ? [ elem ] : elem ); + + // Convert non-html into a text node + } else if ( !rhtml.test( elem ) ) { + nodes.push( context.createTextNode( elem ) ); + + // Convert html into DOM nodes + } else { + tmp = tmp || fragment.appendChild( context.createElement( "div" ) ); + + // Deserialize a standard representation + tag = ( rtagName.exec( elem ) || [ "", "" ] )[ 1 ].toLowerCase(); + wrap = wrapMap[ tag ] || wrapMap._default; + tmp.innerHTML = wrap[ 1 ] + jQuery.htmlPrefilter( elem ) + wrap[ 2 ]; + + // Descend through wrappers to the right content + j = wrap[ 0 ]; + while ( j-- ) { + tmp = tmp.lastChild; + } + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( nodes, tmp.childNodes ); + + // Remember the top-level container + tmp = fragment.firstChild; + + // Ensure the created nodes are orphaned (#12392) + tmp.textContent = ""; + } + } + } + + // Remove wrapper from fragment + fragment.textContent = ""; + + i = 0; + while ( ( elem = nodes[ i++ ] ) ) { + + // Skip elements already in the context collection (trac-4087) + if ( selection && jQuery.inArray( elem, selection ) > -1 ) { + if ( ignored ) { + ignored.push( elem ); + } + continue; + } + + contains = jQuery.contains( elem.ownerDocument, elem ); + + // Append to fragment + tmp = getAll( fragment.appendChild( elem ), "script" ); + + // Preserve script evaluation history + if ( contains ) { + setGlobalEval( tmp ); + } + + // Capture executables + if ( scripts ) { + j = 0; + while ( ( elem = tmp[ j++ ] ) ) { + if ( rscriptType.test( elem.type || "" ) ) { + scripts.push( elem ); + } + } + } + } + + return fragment; +} + + +( function() { + var fragment = document.createDocumentFragment(), + div = fragment.appendChild( document.createElement( "div" ) ), + input = document.createElement( "input" ); + + // Support: Android 4.0 - 4.3 only + // Check state lost if the name is set (#11217) + // Support: Windows Web Apps (WWA) + // `name` and `type` must use .setAttribute for WWA (#14901) + input.setAttribute( "type", "radio" ); + input.setAttribute( "checked", "checked" ); + input.setAttribute( "name", "t" ); + + div.appendChild( input ); + + // Support: Android <=4.1 only + // Older WebKit doesn't clone checked state correctly in fragments + support.checkClone = div.cloneNode( true ).cloneNode( true ).lastChild.checked; + + // Support: IE <=11 only + // Make sure textarea (and checkbox) defaultValue is properly cloned + div.innerHTML = ""; + support.noCloneChecked = !!div.cloneNode( true ).lastChild.defaultValue; +} )(); +var documentElement = document.documentElement; + + + +var + rkeyEvent = /^key/, + rmouseEvent = /^(?:mouse|pointer|contextmenu|drag|drop)|click/, + rtypenamespace = /^([^.]*)(?:\.(.+)|)/; + +function returnTrue() { + return true; +} + +function returnFalse() { + return false; +} + +// Support: IE <=9 only +// See #13393 for more info +function safeActiveElement() { + try { + return document.activeElement; + } catch ( err ) { } +} + +function on( elem, types, selector, data, fn, one ) { + var origFn, type; + + // Types can be a map of types/handlers + if ( typeof types === "object" ) { + + // ( types-Object, selector, data ) + if ( typeof selector !== "string" ) { + + // ( types-Object, data ) + data = data || selector; + selector = undefined; + } + for ( type in types ) { + on( elem, type, selector, data, types[ type ], one ); + } + return elem; + } + + if ( data == null && fn == null ) { + + // ( types, fn ) + fn = selector; + data = selector = undefined; + } else if ( fn == null ) { + if ( typeof selector === "string" ) { + + // ( types, selector, fn ) + fn = data; + data = undefined; + } else { + + // ( types, data, fn ) + fn = data; + data = selector; + selector = undefined; + } + } + if ( fn === false ) { + fn = returnFalse; + } else if ( !fn ) { + return elem; + } + + if ( one === 1 ) { + origFn = fn; + fn = function( event ) { + + // Can use an empty set, since event contains the info + jQuery().off( event ); + return origFn.apply( this, arguments ); + }; + + // Use same guid so caller can remove using origFn + fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ ); + } + return elem.each( function() { + jQuery.event.add( this, types, fn, data, selector ); + } ); +} + +/* + * Helper functions for managing events -- not part of the public interface. + * Props to Dean Edwards' addEvent library for many of the ideas. + */ +jQuery.event = { + + global: {}, + + add: function( elem, types, handler, data, selector ) { + + var handleObjIn, eventHandle, tmp, + events, t, handleObj, + special, handlers, type, namespaces, origType, + elemData = dataPriv.get( elem ); + + // Don't attach events to noData or text/comment nodes (but allow plain objects) + if ( !elemData ) { + return; + } + + // Caller can pass in an object of custom data in lieu of the handler + if ( handler.handler ) { + handleObjIn = handler; + handler = handleObjIn.handler; + selector = handleObjIn.selector; + } + + // Ensure that invalid selectors throw exceptions at attach time + // Evaluate against documentElement in case elem is a non-element node (e.g., document) + if ( selector ) { + jQuery.find.matchesSelector( documentElement, selector ); + } + + // Make sure that the handler has a unique ID, used to find/remove it later + if ( !handler.guid ) { + handler.guid = jQuery.guid++; + } + + // Init the element's event structure and main handler, if this is the first + if ( !( events = elemData.events ) ) { + events = elemData.events = {}; + } + if ( !( eventHandle = elemData.handle ) ) { + eventHandle = elemData.handle = function( e ) { + + // Discard the second event of a jQuery.event.trigger() and + // when an event is called after a page has unloaded + return typeof jQuery !== "undefined" && jQuery.event.triggered !== e.type ? + jQuery.event.dispatch.apply( elem, arguments ) : undefined; + }; + } + + // Handle multiple events separated by a space + types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; + t = types.length; + while ( t-- ) { + tmp = rtypenamespace.exec( types[ t ] ) || []; + type = origType = tmp[ 1 ]; + namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); + + // There *must* be a type, no attaching namespace-only handlers + if ( !type ) { + continue; + } + + // If event changes its type, use the special event handlers for the changed type + special = jQuery.event.special[ type ] || {}; + + // If selector defined, determine special event api type, otherwise given type + type = ( selector ? special.delegateType : special.bindType ) || type; + + // Update special based on newly reset type + special = jQuery.event.special[ type ] || {}; + + // handleObj is passed to all event handlers + handleObj = jQuery.extend( { + type: type, + origType: origType, + data: data, + handler: handler, + guid: handler.guid, + selector: selector, + needsContext: selector && jQuery.expr.match.needsContext.test( selector ), + namespace: namespaces.join( "." ) + }, handleObjIn ); + + // Init the event handler queue if we're the first + if ( !( handlers = events[ type ] ) ) { + handlers = events[ type ] = []; + handlers.delegateCount = 0; + + // Only use addEventListener if the special events handler returns false + if ( !special.setup || + special.setup.call( elem, data, namespaces, eventHandle ) === false ) { + + if ( elem.addEventListener ) { + elem.addEventListener( type, eventHandle ); + } + } + } + + if ( special.add ) { + special.add.call( elem, handleObj ); + + if ( !handleObj.handler.guid ) { + handleObj.handler.guid = handler.guid; + } + } + + // Add to the element's handler list, delegates in front + if ( selector ) { + handlers.splice( handlers.delegateCount++, 0, handleObj ); + } else { + handlers.push( handleObj ); + } + + // Keep track of which events have ever been used, for event optimization + jQuery.event.global[ type ] = true; + } + + }, + + // Detach an event or set of events from an element + remove: function( elem, types, handler, selector, mappedTypes ) { + + var j, origCount, tmp, + events, t, handleObj, + special, handlers, type, namespaces, origType, + elemData = dataPriv.hasData( elem ) && dataPriv.get( elem ); + + if ( !elemData || !( events = elemData.events ) ) { + return; + } + + // Once for each type.namespace in types; type may be omitted + types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; + t = types.length; + while ( t-- ) { + tmp = rtypenamespace.exec( types[ t ] ) || []; + type = origType = tmp[ 1 ]; + namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); + + // Unbind all events (on this namespace, if provided) for the element + if ( !type ) { + for ( type in events ) { + jQuery.event.remove( elem, type + types[ t ], handler, selector, true ); + } + continue; + } + + special = jQuery.event.special[ type ] || {}; + type = ( selector ? special.delegateType : special.bindType ) || type; + handlers = events[ type ] || []; + tmp = tmp[ 2 ] && + new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ); + + // Remove matching events + origCount = j = handlers.length; + while ( j-- ) { + handleObj = handlers[ j ]; + + if ( ( mappedTypes || origType === handleObj.origType ) && + ( !handler || handler.guid === handleObj.guid ) && + ( !tmp || tmp.test( handleObj.namespace ) ) && + ( !selector || selector === handleObj.selector || + selector === "**" && handleObj.selector ) ) { + handlers.splice( j, 1 ); + + if ( handleObj.selector ) { + handlers.delegateCount--; + } + if ( special.remove ) { + special.remove.call( elem, handleObj ); + } + } + } + + // Remove generic event handler if we removed something and no more handlers exist + // (avoids potential for endless recursion during removal of special event handlers) + if ( origCount && !handlers.length ) { + if ( !special.teardown || + special.teardown.call( elem, namespaces, elemData.handle ) === false ) { + + jQuery.removeEvent( elem, type, elemData.handle ); + } + + delete events[ type ]; + } + } + + // Remove data and the expando if it's no longer used + if ( jQuery.isEmptyObject( events ) ) { + dataPriv.remove( elem, "handle events" ); + } + }, + + dispatch: function( nativeEvent ) { + + // Make a writable jQuery.Event from the native event object + var event = jQuery.event.fix( nativeEvent ); + + var i, j, ret, matched, handleObj, handlerQueue, + args = new Array( arguments.length ), + handlers = ( dataPriv.get( this, "events" ) || {} )[ event.type ] || [], + special = jQuery.event.special[ event.type ] || {}; + + // Use the fix-ed jQuery.Event rather than the (read-only) native event + args[ 0 ] = event; + + for ( i = 1; i < arguments.length; i++ ) { + args[ i ] = arguments[ i ]; + } + + event.delegateTarget = this; + + // Call the preDispatch hook for the mapped type, and let it bail if desired + if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) { + return; + } + + // Determine handlers + handlerQueue = jQuery.event.handlers.call( this, event, handlers ); + + // Run delegates first; they may want to stop propagation beneath us + i = 0; + while ( ( matched = handlerQueue[ i++ ] ) && !event.isPropagationStopped() ) { + event.currentTarget = matched.elem; + + j = 0; + while ( ( handleObj = matched.handlers[ j++ ] ) && + !event.isImmediatePropagationStopped() ) { + + // Triggered event must either 1) have no namespace, or 2) have namespace(s) + // a subset or equal to those in the bound event (both can have no namespace). + if ( !event.rnamespace || event.rnamespace.test( handleObj.namespace ) ) { + + event.handleObj = handleObj; + event.data = handleObj.data; + + ret = ( ( jQuery.event.special[ handleObj.origType ] || {} ).handle || + handleObj.handler ).apply( matched.elem, args ); + + if ( ret !== undefined ) { + if ( ( event.result = ret ) === false ) { + event.preventDefault(); + event.stopPropagation(); + } + } + } + } + } + + // Call the postDispatch hook for the mapped type + if ( special.postDispatch ) { + special.postDispatch.call( this, event ); + } + + return event.result; + }, + + handlers: function( event, handlers ) { + var i, handleObj, sel, matchedHandlers, matchedSelectors, + handlerQueue = [], + delegateCount = handlers.delegateCount, + cur = event.target; + + // Find delegate handlers + if ( delegateCount && + + // Support: IE <=9 + // Black-hole SVG instance trees (trac-13180) + cur.nodeType && + + // Support: Firefox <=42 + // Suppress spec-violating clicks indicating a non-primary pointer button (trac-3861) + // https://www.w3.org/TR/DOM-Level-3-Events/#event-type-click + // Support: IE 11 only + // ...but not arrow key "clicks" of radio inputs, which can have `button` -1 (gh-2343) + !( event.type === "click" && event.button >= 1 ) ) { + + for ( ; cur !== this; cur = cur.parentNode || this ) { + + // Don't check non-elements (#13208) + // Don't process clicks on disabled elements (#6911, #8165, #11382, #11764) + if ( cur.nodeType === 1 && !( event.type === "click" && cur.disabled === true ) ) { + matchedHandlers = []; + matchedSelectors = {}; + for ( i = 0; i < delegateCount; i++ ) { + handleObj = handlers[ i ]; + + // Don't conflict with Object.prototype properties (#13203) + sel = handleObj.selector + " "; + + if ( matchedSelectors[ sel ] === undefined ) { + matchedSelectors[ sel ] = handleObj.needsContext ? + jQuery( sel, this ).index( cur ) > -1 : + jQuery.find( sel, this, null, [ cur ] ).length; + } + if ( matchedSelectors[ sel ] ) { + matchedHandlers.push( handleObj ); + } + } + if ( matchedHandlers.length ) { + handlerQueue.push( { elem: cur, handlers: matchedHandlers } ); + } + } + } + } + + // Add the remaining (directly-bound) handlers + cur = this; + if ( delegateCount < handlers.length ) { + handlerQueue.push( { elem: cur, handlers: handlers.slice( delegateCount ) } ); + } + + return handlerQueue; + }, + + addProp: function( name, hook ) { + Object.defineProperty( jQuery.Event.prototype, name, { + enumerable: true, + configurable: true, + + get: jQuery.isFunction( hook ) ? + function() { + if ( this.originalEvent ) { + return hook( this.originalEvent ); + } + } : + function() { + if ( this.originalEvent ) { + return this.originalEvent[ name ]; + } + }, + + set: function( value ) { + Object.defineProperty( this, name, { + enumerable: true, + configurable: true, + writable: true, + value: value + } ); + } + } ); + }, + + fix: function( originalEvent ) { + return originalEvent[ jQuery.expando ] ? + originalEvent : + new jQuery.Event( originalEvent ); + }, + + special: { + load: { + + // Prevent triggered image.load events from bubbling to window.load + noBubble: true + }, + focus: { + + // Fire native event if possible so blur/focus sequence is correct + trigger: function() { + if ( this !== safeActiveElement() && this.focus ) { + this.focus(); + return false; + } + }, + delegateType: "focusin" + }, + blur: { + trigger: function() { + if ( this === safeActiveElement() && this.blur ) { + this.blur(); + return false; + } + }, + delegateType: "focusout" + }, + click: { + + // For checkbox, fire native event so checked state will be right + trigger: function() { + if ( this.type === "checkbox" && this.click && nodeName( this, "input" ) ) { + this.click(); + return false; + } + }, + + // For cross-browser consistency, don't fire native .click() on links + _default: function( event ) { + return nodeName( event.target, "a" ); + } + }, + + beforeunload: { + postDispatch: function( event ) { + + // Support: Firefox 20+ + // Firefox doesn't alert if the returnValue field is not set. + if ( event.result !== undefined && event.originalEvent ) { + event.originalEvent.returnValue = event.result; + } + } + } + } +}; + +jQuery.removeEvent = function( elem, type, handle ) { + + // This "if" is needed for plain objects + if ( elem.removeEventListener ) { + elem.removeEventListener( type, handle ); + } +}; + +jQuery.Event = function( src, props ) { + + // Allow instantiation without the 'new' keyword + if ( !( this instanceof jQuery.Event ) ) { + return new jQuery.Event( src, props ); + } + + // Event object + if ( src && src.type ) { + this.originalEvent = src; + this.type = src.type; + + // Events bubbling up the document may have been marked as prevented + // by a handler lower down the tree; reflect the correct value. + this.isDefaultPrevented = src.defaultPrevented || + src.defaultPrevented === undefined && + + // Support: Android <=2.3 only + src.returnValue === false ? + returnTrue : + returnFalse; + + // Create target properties + // Support: Safari <=6 - 7 only + // Target should not be a text node (#504, #13143) + this.target = ( src.target && src.target.nodeType === 3 ) ? + src.target.parentNode : + src.target; + + this.currentTarget = src.currentTarget; + this.relatedTarget = src.relatedTarget; + + // Event type + } else { + this.type = src; + } + + // Put explicitly provided properties onto the event object + if ( props ) { + jQuery.extend( this, props ); + } + + // Create a timestamp if incoming event doesn't have one + this.timeStamp = src && src.timeStamp || jQuery.now(); + + // Mark it as fixed + this[ jQuery.expando ] = true; +}; + +// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding +// https://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html +jQuery.Event.prototype = { + constructor: jQuery.Event, + isDefaultPrevented: returnFalse, + isPropagationStopped: returnFalse, + isImmediatePropagationStopped: returnFalse, + isSimulated: false, + + preventDefault: function() { + var e = this.originalEvent; + + this.isDefaultPrevented = returnTrue; + + if ( e && !this.isSimulated ) { + e.preventDefault(); + } + }, + stopPropagation: function() { + var e = this.originalEvent; + + this.isPropagationStopped = returnTrue; + + if ( e && !this.isSimulated ) { + e.stopPropagation(); + } + }, + stopImmediatePropagation: function() { + var e = this.originalEvent; + + this.isImmediatePropagationStopped = returnTrue; + + if ( e && !this.isSimulated ) { + e.stopImmediatePropagation(); + } + + this.stopPropagation(); + } +}; + +// Includes all common event props including KeyEvent and MouseEvent specific props +jQuery.each( { + altKey: true, + bubbles: true, + cancelable: true, + changedTouches: true, + ctrlKey: true, + detail: true, + eventPhase: true, + metaKey: true, + pageX: true, + pageY: true, + shiftKey: true, + view: true, + "char": true, + charCode: true, + key: true, + keyCode: true, + button: true, + buttons: true, + clientX: true, + clientY: true, + offsetX: true, + offsetY: true, + pointerId: true, + pointerType: true, + screenX: true, + screenY: true, + targetTouches: true, + toElement: true, + touches: true, + + which: function( event ) { + var button = event.button; + + // Add which for key events + if ( event.which == null && rkeyEvent.test( event.type ) ) { + return event.charCode != null ? event.charCode : event.keyCode; + } + + // Add which for click: 1 === left; 2 === middle; 3 === right + if ( !event.which && button !== undefined && rmouseEvent.test( event.type ) ) { + if ( button & 1 ) { + return 1; + } + + if ( button & 2 ) { + return 3; + } + + if ( button & 4 ) { + return 2; + } + + return 0; + } + + return event.which; + } +}, jQuery.event.addProp ); + +// Create mouseenter/leave events using mouseover/out and event-time checks +// so that event delegation works in jQuery. +// Do the same for pointerenter/pointerleave and pointerover/pointerout +// +// Support: Safari 7 only +// Safari sends mouseenter too often; see: +// https://bugs.chromium.org/p/chromium/issues/detail?id=470258 +// for the description of the bug (it existed in older Chrome versions as well). +jQuery.each( { + mouseenter: "mouseover", + mouseleave: "mouseout", + pointerenter: "pointerover", + pointerleave: "pointerout" +}, function( orig, fix ) { + jQuery.event.special[ orig ] = { + delegateType: fix, + bindType: fix, + + handle: function( event ) { + var ret, + target = this, + related = event.relatedTarget, + handleObj = event.handleObj; + + // For mouseenter/leave call the handler if related is outside the target. + // NB: No relatedTarget if the mouse left/entered the browser window + if ( !related || ( related !== target && !jQuery.contains( target, related ) ) ) { + event.type = handleObj.origType; + ret = handleObj.handler.apply( this, arguments ); + event.type = fix; + } + return ret; + } + }; +} ); + +jQuery.fn.extend( { + + on: function( types, selector, data, fn ) { + return on( this, types, selector, data, fn ); + }, + one: function( types, selector, data, fn ) { + return on( this, types, selector, data, fn, 1 ); + }, + off: function( types, selector, fn ) { + var handleObj, type; + if ( types && types.preventDefault && types.handleObj ) { + + // ( event ) dispatched jQuery.Event + handleObj = types.handleObj; + jQuery( types.delegateTarget ).off( + handleObj.namespace ? + handleObj.origType + "." + handleObj.namespace : + handleObj.origType, + handleObj.selector, + handleObj.handler + ); + return this; + } + if ( typeof types === "object" ) { + + // ( types-object [, selector] ) + for ( type in types ) { + this.off( type, selector, types[ type ] ); + } + return this; + } + if ( selector === false || typeof selector === "function" ) { + + // ( types [, fn] ) + fn = selector; + selector = undefined; + } + if ( fn === false ) { + fn = returnFalse; + } + return this.each( function() { + jQuery.event.remove( this, types, fn, selector ); + } ); + } +} ); + + +var + + /* eslint-disable max-len */ + + // See https://github.com/eslint/eslint/issues/3229 + rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi, + + /* eslint-enable */ + + // Support: IE <=10 - 11, Edge 12 - 13 + // In IE/Edge using regex groups here causes severe slowdowns. + // See https://connect.microsoft.com/IE/feedback/details/1736512/ + rnoInnerhtml = /\s*$/g; + +// Prefer a tbody over its parent table for containing new rows +function manipulationTarget( elem, content ) { + if ( nodeName( elem, "table" ) && + nodeName( content.nodeType !== 11 ? content : content.firstChild, "tr" ) ) { + + return jQuery( ">tbody", elem )[ 0 ] || elem; + } + + return elem; +} + +// Replace/restore the type attribute of script elements for safe DOM manipulation +function disableScript( elem ) { + elem.type = ( elem.getAttribute( "type" ) !== null ) + "/" + elem.type; + return elem; +} +function restoreScript( elem ) { + var match = rscriptTypeMasked.exec( elem.type ); + + if ( match ) { + elem.type = match[ 1 ]; + } else { + elem.removeAttribute( "type" ); + } + + return elem; +} + +function cloneCopyEvent( src, dest ) { + var i, l, type, pdataOld, pdataCur, udataOld, udataCur, events; + + if ( dest.nodeType !== 1 ) { + return; + } + + // 1. Copy private data: events, handlers, etc. + if ( dataPriv.hasData( src ) ) { + pdataOld = dataPriv.access( src ); + pdataCur = dataPriv.set( dest, pdataOld ); + events = pdataOld.events; + + if ( events ) { + delete pdataCur.handle; + pdataCur.events = {}; + + for ( type in events ) { + for ( i = 0, l = events[ type ].length; i < l; i++ ) { + jQuery.event.add( dest, type, events[ type ][ i ] ); + } + } + } + } + + // 2. Copy user data + if ( dataUser.hasData( src ) ) { + udataOld = dataUser.access( src ); + udataCur = jQuery.extend( {}, udataOld ); + + dataUser.set( dest, udataCur ); + } +} + +// Fix IE bugs, see support tests +function fixInput( src, dest ) { + var nodeName = dest.nodeName.toLowerCase(); + + // Fails to persist the checked state of a cloned checkbox or radio button. + if ( nodeName === "input" && rcheckableType.test( src.type ) ) { + dest.checked = src.checked; + + // Fails to return the selected option to the default selected state when cloning options + } else if ( nodeName === "input" || nodeName === "textarea" ) { + dest.defaultValue = src.defaultValue; + } +} + +function domManip( collection, args, callback, ignored ) { + + // Flatten any nested arrays + args = concat.apply( [], args ); + + var fragment, first, scripts, hasScripts, node, doc, + i = 0, + l = collection.length, + iNoClone = l - 1, + value = args[ 0 ], + isFunction = jQuery.isFunction( value ); + + // We can't cloneNode fragments that contain checked, in WebKit + if ( isFunction || + ( l > 1 && typeof value === "string" && + !support.checkClone && rchecked.test( value ) ) ) { + return collection.each( function( index ) { + var self = collection.eq( index ); + if ( isFunction ) { + args[ 0 ] = value.call( this, index, self.html() ); + } + domManip( self, args, callback, ignored ); + } ); + } + + if ( l ) { + fragment = buildFragment( args, collection[ 0 ].ownerDocument, false, collection, ignored ); + first = fragment.firstChild; + + if ( fragment.childNodes.length === 1 ) { + fragment = first; + } + + // Require either new content or an interest in ignored elements to invoke the callback + if ( first || ignored ) { + scripts = jQuery.map( getAll( fragment, "script" ), disableScript ); + hasScripts = scripts.length; + + // Use the original fragment for the last item + // instead of the first because it can end up + // being emptied incorrectly in certain situations (#8070). + for ( ; i < l; i++ ) { + node = fragment; + + if ( i !== iNoClone ) { + node = jQuery.clone( node, true, true ); + + // Keep references to cloned scripts for later restoration + if ( hasScripts ) { + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( scripts, getAll( node, "script" ) ); + } + } + + callback.call( collection[ i ], node, i ); + } + + if ( hasScripts ) { + doc = scripts[ scripts.length - 1 ].ownerDocument; + + // Reenable scripts + jQuery.map( scripts, restoreScript ); + + // Evaluate executable scripts on first document insertion + for ( i = 0; i < hasScripts; i++ ) { + node = scripts[ i ]; + if ( rscriptType.test( node.type || "" ) && + !dataPriv.access( node, "globalEval" ) && + jQuery.contains( doc, node ) ) { + + if ( node.src ) { + + // Optional AJAX dependency, but won't run scripts if not present + if ( jQuery._evalUrl ) { + jQuery._evalUrl( node.src ); + } + } else { + DOMEval( node.textContent.replace( rcleanScript, "" ), doc ); + } + } + } + } + } + } + + return collection; +} + +function remove( elem, selector, keepData ) { + var node, + nodes = selector ? jQuery.filter( selector, elem ) : elem, + i = 0; + + for ( ; ( node = nodes[ i ] ) != null; i++ ) { + if ( !keepData && node.nodeType === 1 ) { + jQuery.cleanData( getAll( node ) ); + } + + if ( node.parentNode ) { + if ( keepData && jQuery.contains( node.ownerDocument, node ) ) { + setGlobalEval( getAll( node, "script" ) ); + } + node.parentNode.removeChild( node ); + } + } + + return elem; +} + +jQuery.extend( { + htmlPrefilter: function( html ) { + return html.replace( rxhtmlTag, "<$1>" ); + }, + + clone: function( elem, dataAndEvents, deepDataAndEvents ) { + var i, l, srcElements, destElements, + clone = elem.cloneNode( true ), + inPage = jQuery.contains( elem.ownerDocument, elem ); + + // Fix IE cloning issues + if ( !support.noCloneChecked && ( elem.nodeType === 1 || elem.nodeType === 11 ) && + !jQuery.isXMLDoc( elem ) ) { + + // We eschew Sizzle here for performance reasons: https://jsperf.com/getall-vs-sizzle/2 + destElements = getAll( clone ); + srcElements = getAll( elem ); + + for ( i = 0, l = srcElements.length; i < l; i++ ) { + fixInput( srcElements[ i ], destElements[ i ] ); + } + } + + // Copy the events from the original to the clone + if ( dataAndEvents ) { + if ( deepDataAndEvents ) { + srcElements = srcElements || getAll( elem ); + destElements = destElements || getAll( clone ); + + for ( i = 0, l = srcElements.length; i < l; i++ ) { + cloneCopyEvent( srcElements[ i ], destElements[ i ] ); + } + } else { + cloneCopyEvent( elem, clone ); + } + } + + // Preserve script evaluation history + destElements = getAll( clone, "script" ); + if ( destElements.length > 0 ) { + setGlobalEval( destElements, !inPage && getAll( elem, "script" ) ); + } + + // Return the cloned set + return clone; + }, + + cleanData: function( elems ) { + var data, elem, type, + special = jQuery.event.special, + i = 0; + + for ( ; ( elem = elems[ i ] ) !== undefined; i++ ) { + if ( acceptData( elem ) ) { + if ( ( data = elem[ dataPriv.expando ] ) ) { + if ( data.events ) { + for ( type in data.events ) { + if ( special[ type ] ) { + jQuery.event.remove( elem, type ); + + // This is a shortcut to avoid jQuery.event.remove's overhead + } else { + jQuery.removeEvent( elem, type, data.handle ); + } + } + } + + // Support: Chrome <=35 - 45+ + // Assign undefined instead of using delete, see Data#remove + elem[ dataPriv.expando ] = undefined; + } + if ( elem[ dataUser.expando ] ) { + + // Support: Chrome <=35 - 45+ + // Assign undefined instead of using delete, see Data#remove + elem[ dataUser.expando ] = undefined; + } + } + } + } +} ); + +jQuery.fn.extend( { + detach: function( selector ) { + return remove( this, selector, true ); + }, + + remove: function( selector ) { + return remove( this, selector ); + }, + + text: function( value ) { + return access( this, function( value ) { + return value === undefined ? + jQuery.text( this ) : + this.empty().each( function() { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + this.textContent = value; + } + } ); + }, null, value, arguments.length ); + }, + + append: function() { + return domManip( this, arguments, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + var target = manipulationTarget( this, elem ); + target.appendChild( elem ); + } + } ); + }, + + prepend: function() { + return domManip( this, arguments, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + var target = manipulationTarget( this, elem ); + target.insertBefore( elem, target.firstChild ); + } + } ); + }, + + before: function() { + return domManip( this, arguments, function( elem ) { + if ( this.parentNode ) { + this.parentNode.insertBefore( elem, this ); + } + } ); + }, + + after: function() { + return domManip( this, arguments, function( elem ) { + if ( this.parentNode ) { + this.parentNode.insertBefore( elem, this.nextSibling ); + } + } ); + }, + + empty: function() { + var elem, + i = 0; + + for ( ; ( elem = this[ i ] ) != null; i++ ) { + if ( elem.nodeType === 1 ) { + + // Prevent memory leaks + jQuery.cleanData( getAll( elem, false ) ); + + // Remove any remaining nodes + elem.textContent = ""; + } + } + + return this; + }, + + clone: function( dataAndEvents, deepDataAndEvents ) { + dataAndEvents = dataAndEvents == null ? false : dataAndEvents; + deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents; + + return this.map( function() { + return jQuery.clone( this, dataAndEvents, deepDataAndEvents ); + } ); + }, + + html: function( value ) { + return access( this, function( value ) { + var elem = this[ 0 ] || {}, + i = 0, + l = this.length; + + if ( value === undefined && elem.nodeType === 1 ) { + return elem.innerHTML; + } + + // See if we can take a shortcut and just use innerHTML + if ( typeof value === "string" && !rnoInnerhtml.test( value ) && + !wrapMap[ ( rtagName.exec( value ) || [ "", "" ] )[ 1 ].toLowerCase() ] ) { + + value = jQuery.htmlPrefilter( value ); + + try { + for ( ; i < l; i++ ) { + elem = this[ i ] || {}; + + // Remove element nodes and prevent memory leaks + if ( elem.nodeType === 1 ) { + jQuery.cleanData( getAll( elem, false ) ); + elem.innerHTML = value; + } + } + + elem = 0; + + // If using innerHTML throws an exception, use the fallback method + } catch ( e ) {} + } + + if ( elem ) { + this.empty().append( value ); + } + }, null, value, arguments.length ); + }, + + replaceWith: function() { + var ignored = []; + + // Make the changes, replacing each non-ignored context element with the new content + return domManip( this, arguments, function( elem ) { + var parent = this.parentNode; + + if ( jQuery.inArray( this, ignored ) < 0 ) { + jQuery.cleanData( getAll( this ) ); + if ( parent ) { + parent.replaceChild( elem, this ); + } + } + + // Force callback invocation + }, ignored ); + } +} ); + +jQuery.each( { + appendTo: "append", + prependTo: "prepend", + insertBefore: "before", + insertAfter: "after", + replaceAll: "replaceWith" +}, function( name, original ) { + jQuery.fn[ name ] = function( selector ) { + var elems, + ret = [], + insert = jQuery( selector ), + last = insert.length - 1, + i = 0; + + for ( ; i <= last; i++ ) { + elems = i === last ? this : this.clone( true ); + jQuery( insert[ i ] )[ original ]( elems ); + + // Support: Android <=4.0 only, PhantomJS 1 only + // .get() because push.apply(_, arraylike) throws on ancient WebKit + push.apply( ret, elems.get() ); + } + + return this.pushStack( ret ); + }; +} ); +var rmargin = ( /^margin/ ); + +var rnumnonpx = new RegExp( "^(" + pnum + ")(?!px)[a-z%]+$", "i" ); + +var getStyles = function( elem ) { + + // Support: IE <=11 only, Firefox <=30 (#15098, #14150) + // IE throws on elements created in popups + // FF meanwhile throws on frame elements through "defaultView.getComputedStyle" + var view = elem.ownerDocument.defaultView; + + if ( !view || !view.opener ) { + view = window; + } + + return view.getComputedStyle( elem ); + }; + + + +( function() { + + // Executing both pixelPosition & boxSizingReliable tests require only one layout + // so they're executed at the same time to save the second computation. + function computeStyleTests() { + + // This is a singleton, we need to execute it only once + if ( !div ) { + return; + } + + div.style.cssText = + "box-sizing:border-box;" + + "position:relative;display:block;" + + "margin:auto;border:1px;padding:1px;" + + "top:1%;width:50%"; + div.innerHTML = ""; + documentElement.appendChild( container ); + + var divStyle = window.getComputedStyle( div ); + pixelPositionVal = divStyle.top !== "1%"; + + // Support: Android 4.0 - 4.3 only, Firefox <=3 - 44 + reliableMarginLeftVal = divStyle.marginLeft === "2px"; + boxSizingReliableVal = divStyle.width === "4px"; + + // Support: Android 4.0 - 4.3 only + // Some styles come back with percentage values, even though they shouldn't + div.style.marginRight = "50%"; + pixelMarginRightVal = divStyle.marginRight === "4px"; + + documentElement.removeChild( container ); + + // Nullify the div so it wouldn't be stored in the memory and + // it will also be a sign that checks already performed + div = null; + } + + var pixelPositionVal, boxSizingReliableVal, pixelMarginRightVal, reliableMarginLeftVal, + container = document.createElement( "div" ), + div = document.createElement( "div" ); + + // Finish early in limited (non-browser) environments + if ( !div.style ) { + return; + } + + // Support: IE <=9 - 11 only + // Style of cloned element affects source element cloned (#8908) + div.style.backgroundClip = "content-box"; + div.cloneNode( true ).style.backgroundClip = ""; + support.clearCloneStyle = div.style.backgroundClip === "content-box"; + + container.style.cssText = "border:0;width:8px;height:0;top:0;left:-9999px;" + + "padding:0;margin-top:1px;position:absolute"; + container.appendChild( div ); + + jQuery.extend( support, { + pixelPosition: function() { + computeStyleTests(); + return pixelPositionVal; + }, + boxSizingReliable: function() { + computeStyleTests(); + return boxSizingReliableVal; + }, + pixelMarginRight: function() { + computeStyleTests(); + return pixelMarginRightVal; + }, + reliableMarginLeft: function() { + computeStyleTests(); + return reliableMarginLeftVal; + } + } ); +} )(); + + +function curCSS( elem, name, computed ) { + var width, minWidth, maxWidth, ret, + + // Support: Firefox 51+ + // Retrieving style before computed somehow + // fixes an issue with getting wrong values + // on detached elements + style = elem.style; + + computed = computed || getStyles( elem ); + + // getPropertyValue is needed for: + // .css('filter') (IE 9 only, #12537) + // .css('--customProperty) (#3144) + if ( computed ) { + ret = computed.getPropertyValue( name ) || computed[ name ]; + + if ( ret === "" && !jQuery.contains( elem.ownerDocument, elem ) ) { + ret = jQuery.style( elem, name ); + } + + // A tribute to the "awesome hack by Dean Edwards" + // Android Browser returns percentage for some values, + // but width seems to be reliably pixels. + // This is against the CSSOM draft spec: + // https://drafts.csswg.org/cssom/#resolved-values + if ( !support.pixelMarginRight() && rnumnonpx.test( ret ) && rmargin.test( name ) ) { + + // Remember the original values + width = style.width; + minWidth = style.minWidth; + maxWidth = style.maxWidth; + + // Put in the new values to get a computed value out + style.minWidth = style.maxWidth = style.width = ret; + ret = computed.width; + + // Revert the changed values + style.width = width; + style.minWidth = minWidth; + style.maxWidth = maxWidth; + } + } + + return ret !== undefined ? + + // Support: IE <=9 - 11 only + // IE returns zIndex value as an integer. + ret + "" : + ret; +} + + +function addGetHookIf( conditionFn, hookFn ) { + + // Define the hook, we'll check on the first run if it's really needed. + return { + get: function() { + if ( conditionFn() ) { + + // Hook not needed (or it's not possible to use it due + // to missing dependency), remove it. + delete this.get; + return; + } + + // Hook needed; redefine it so that the support test is not executed again. + return ( this.get = hookFn ).apply( this, arguments ); + } + }; +} + + +var + + // Swappable if display is none or starts with table + // except "table", "table-cell", or "table-caption" + // See here for display values: https://developer.mozilla.org/en-US/docs/CSS/display + rdisplayswap = /^(none|table(?!-c[ea]).+)/, + rcustomProp = /^--/, + cssShow = { position: "absolute", visibility: "hidden", display: "block" }, + cssNormalTransform = { + letterSpacing: "0", + fontWeight: "400" + }, + + cssPrefixes = [ "Webkit", "Moz", "ms" ], + emptyStyle = document.createElement( "div" ).style; + +// Return a css property mapped to a potentially vendor prefixed property +function vendorPropName( name ) { + + // Shortcut for names that are not vendor prefixed + if ( name in emptyStyle ) { + return name; + } + + // Check for vendor prefixed names + var capName = name[ 0 ].toUpperCase() + name.slice( 1 ), + i = cssPrefixes.length; + + while ( i-- ) { + name = cssPrefixes[ i ] + capName; + if ( name in emptyStyle ) { + return name; + } + } +} + +// Return a property mapped along what jQuery.cssProps suggests or to +// a vendor prefixed property. +function finalPropName( name ) { + var ret = jQuery.cssProps[ name ]; + if ( !ret ) { + ret = jQuery.cssProps[ name ] = vendorPropName( name ) || name; + } + return ret; +} + +function setPositiveNumber( elem, value, subtract ) { + + // Any relative (+/-) values have already been + // normalized at this point + var matches = rcssNum.exec( value ); + return matches ? + + // Guard against undefined "subtract", e.g., when used as in cssHooks + Math.max( 0, matches[ 2 ] - ( subtract || 0 ) ) + ( matches[ 3 ] || "px" ) : + value; +} + +function augmentWidthOrHeight( elem, name, extra, isBorderBox, styles ) { + var i, + val = 0; + + // If we already have the right measurement, avoid augmentation + if ( extra === ( isBorderBox ? "border" : "content" ) ) { + i = 4; + + // Otherwise initialize for horizontal or vertical properties + } else { + i = name === "width" ? 1 : 0; + } + + for ( ; i < 4; i += 2 ) { + + // Both box models exclude margin, so add it if we want it + if ( extra === "margin" ) { + val += jQuery.css( elem, extra + cssExpand[ i ], true, styles ); + } + + if ( isBorderBox ) { + + // border-box includes padding, so remove it if we want content + if ( extra === "content" ) { + val -= jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); + } + + // At this point, extra isn't border nor margin, so remove border + if ( extra !== "margin" ) { + val -= jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); + } + } else { + + // At this point, extra isn't content, so add padding + val += jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); + + // At this point, extra isn't content nor padding, so add border + if ( extra !== "padding" ) { + val += jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); + } + } + } + + return val; +} + +function getWidthOrHeight( elem, name, extra ) { + + // Start with computed style + var valueIsBorderBox, + styles = getStyles( elem ), + val = curCSS( elem, name, styles ), + isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box"; + + // Computed unit is not pixels. Stop here and return. + if ( rnumnonpx.test( val ) ) { + return val; + } + + // Check for style in case a browser which returns unreliable values + // for getComputedStyle silently falls back to the reliable elem.style + valueIsBorderBox = isBorderBox && + ( support.boxSizingReliable() || val === elem.style[ name ] ); + + // Fall back to offsetWidth/Height when value is "auto" + // This happens for inline elements with no explicit setting (gh-3571) + if ( val === "auto" ) { + val = elem[ "offset" + name[ 0 ].toUpperCase() + name.slice( 1 ) ]; + } + + // Normalize "", auto, and prepare for extra + val = parseFloat( val ) || 0; + + // Use the active box-sizing model to add/subtract irrelevant styles + return ( val + + augmentWidthOrHeight( + elem, + name, + extra || ( isBorderBox ? "border" : "content" ), + valueIsBorderBox, + styles + ) + ) + "px"; +} + +jQuery.extend( { + + // Add in style property hooks for overriding the default + // behavior of getting and setting a style property + cssHooks: { + opacity: { + get: function( elem, computed ) { + if ( computed ) { + + // We should always get a number back from opacity + var ret = curCSS( elem, "opacity" ); + return ret === "" ? "1" : ret; + } + } + } + }, + + // Don't automatically add "px" to these possibly-unitless properties + cssNumber: { + "animationIterationCount": true, + "columnCount": true, + "fillOpacity": true, + "flexGrow": true, + "flexShrink": true, + "fontWeight": true, + "lineHeight": true, + "opacity": true, + "order": true, + "orphans": true, + "widows": true, + "zIndex": true, + "zoom": true + }, + + // Add in properties whose names you wish to fix before + // setting or getting the value + cssProps: { + "float": "cssFloat" + }, + + // Get and set the style property on a DOM Node + style: function( elem, name, value, extra ) { + + // Don't set styles on text and comment nodes + if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) { + return; + } + + // Make sure that we're working with the right name + var ret, type, hooks, + origName = jQuery.camelCase( name ), + isCustomProp = rcustomProp.test( name ), + style = elem.style; + + // Make sure that we're working with the right name. We don't + // want to query the value if it is a CSS custom property + // since they are user-defined. + if ( !isCustomProp ) { + name = finalPropName( origName ); + } + + // Gets hook for the prefixed version, then unprefixed version + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // Check if we're setting a value + if ( value !== undefined ) { + type = typeof value; + + // Convert "+=" or "-=" to relative numbers (#7345) + if ( type === "string" && ( ret = rcssNum.exec( value ) ) && ret[ 1 ] ) { + value = adjustCSS( elem, name, ret ); + + // Fixes bug #9237 + type = "number"; + } + + // Make sure that null and NaN values aren't set (#7116) + if ( value == null || value !== value ) { + return; + } + + // If a number was passed in, add the unit (except for certain CSS properties) + if ( type === "number" ) { + value += ret && ret[ 3 ] || ( jQuery.cssNumber[ origName ] ? "" : "px" ); + } + + // background-* props affect original clone's values + if ( !support.clearCloneStyle && value === "" && name.indexOf( "background" ) === 0 ) { + style[ name ] = "inherit"; + } + + // If a hook was provided, use that value, otherwise just set the specified value + if ( !hooks || !( "set" in hooks ) || + ( value = hooks.set( elem, value, extra ) ) !== undefined ) { + + if ( isCustomProp ) { + style.setProperty( name, value ); + } else { + style[ name ] = value; + } + } + + } else { + + // If a hook was provided get the non-computed value from there + if ( hooks && "get" in hooks && + ( ret = hooks.get( elem, false, extra ) ) !== undefined ) { + + return ret; + } + + // Otherwise just get the value from the style object + return style[ name ]; + } + }, + + css: function( elem, name, extra, styles ) { + var val, num, hooks, + origName = jQuery.camelCase( name ), + isCustomProp = rcustomProp.test( name ); + + // Make sure that we're working with the right name. We don't + // want to modify the value if it is a CSS custom property + // since they are user-defined. + if ( !isCustomProp ) { + name = finalPropName( origName ); + } + + // Try prefixed name followed by the unprefixed name + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // If a hook was provided get the computed value from there + if ( hooks && "get" in hooks ) { + val = hooks.get( elem, true, extra ); + } + + // Otherwise, if a way to get the computed value exists, use that + if ( val === undefined ) { + val = curCSS( elem, name, styles ); + } + + // Convert "normal" to computed value + if ( val === "normal" && name in cssNormalTransform ) { + val = cssNormalTransform[ name ]; + } + + // Make numeric if forced or a qualifier was provided and val looks numeric + if ( extra === "" || extra ) { + num = parseFloat( val ); + return extra === true || isFinite( num ) ? num || 0 : val; + } + + return val; + } +} ); + +jQuery.each( [ "height", "width" ], function( i, name ) { + jQuery.cssHooks[ name ] = { + get: function( elem, computed, extra ) { + if ( computed ) { + + // Certain elements can have dimension info if we invisibly show them + // but it must have a current display style that would benefit + return rdisplayswap.test( jQuery.css( elem, "display" ) ) && + + // Support: Safari 8+ + // Table columns in Safari have non-zero offsetWidth & zero + // getBoundingClientRect().width unless display is changed. + // Support: IE <=11 only + // Running getBoundingClientRect on a disconnected node + // in IE throws an error. + ( !elem.getClientRects().length || !elem.getBoundingClientRect().width ) ? + swap( elem, cssShow, function() { + return getWidthOrHeight( elem, name, extra ); + } ) : + getWidthOrHeight( elem, name, extra ); + } + }, + + set: function( elem, value, extra ) { + var matches, + styles = extra && getStyles( elem ), + subtract = extra && augmentWidthOrHeight( + elem, + name, + extra, + jQuery.css( elem, "boxSizing", false, styles ) === "border-box", + styles + ); + + // Convert to pixels if value adjustment is needed + if ( subtract && ( matches = rcssNum.exec( value ) ) && + ( matches[ 3 ] || "px" ) !== "px" ) { + + elem.style[ name ] = value; + value = jQuery.css( elem, name ); + } + + return setPositiveNumber( elem, value, subtract ); + } + }; +} ); + +jQuery.cssHooks.marginLeft = addGetHookIf( support.reliableMarginLeft, + function( elem, computed ) { + if ( computed ) { + return ( parseFloat( curCSS( elem, "marginLeft" ) ) || + elem.getBoundingClientRect().left - + swap( elem, { marginLeft: 0 }, function() { + return elem.getBoundingClientRect().left; + } ) + ) + "px"; + } + } +); + +// These hooks are used by animate to expand properties +jQuery.each( { + margin: "", + padding: "", + border: "Width" +}, function( prefix, suffix ) { + jQuery.cssHooks[ prefix + suffix ] = { + expand: function( value ) { + var i = 0, + expanded = {}, + + // Assumes a single number if not a string + parts = typeof value === "string" ? value.split( " " ) : [ value ]; + + for ( ; i < 4; i++ ) { + expanded[ prefix + cssExpand[ i ] + suffix ] = + parts[ i ] || parts[ i - 2 ] || parts[ 0 ]; + } + + return expanded; + } + }; + + if ( !rmargin.test( prefix ) ) { + jQuery.cssHooks[ prefix + suffix ].set = setPositiveNumber; + } +} ); + +jQuery.fn.extend( { + css: function( name, value ) { + return access( this, function( elem, name, value ) { + var styles, len, + map = {}, + i = 0; + + if ( Array.isArray( name ) ) { + styles = getStyles( elem ); + len = name.length; + + for ( ; i < len; i++ ) { + map[ name[ i ] ] = jQuery.css( elem, name[ i ], false, styles ); + } + + return map; + } + + return value !== undefined ? + jQuery.style( elem, name, value ) : + jQuery.css( elem, name ); + }, name, value, arguments.length > 1 ); + } +} ); + + +function Tween( elem, options, prop, end, easing ) { + return new Tween.prototype.init( elem, options, prop, end, easing ); +} +jQuery.Tween = Tween; + +Tween.prototype = { + constructor: Tween, + init: function( elem, options, prop, end, easing, unit ) { + this.elem = elem; + this.prop = prop; + this.easing = easing || jQuery.easing._default; + this.options = options; + this.start = this.now = this.cur(); + this.end = end; + this.unit = unit || ( jQuery.cssNumber[ prop ] ? "" : "px" ); + }, + cur: function() { + var hooks = Tween.propHooks[ this.prop ]; + + return hooks && hooks.get ? + hooks.get( this ) : + Tween.propHooks._default.get( this ); + }, + run: function( percent ) { + var eased, + hooks = Tween.propHooks[ this.prop ]; + + if ( this.options.duration ) { + this.pos = eased = jQuery.easing[ this.easing ]( + percent, this.options.duration * percent, 0, 1, this.options.duration + ); + } else { + this.pos = eased = percent; + } + this.now = ( this.end - this.start ) * eased + this.start; + + if ( this.options.step ) { + this.options.step.call( this.elem, this.now, this ); + } + + if ( hooks && hooks.set ) { + hooks.set( this ); + } else { + Tween.propHooks._default.set( this ); + } + return this; + } +}; + +Tween.prototype.init.prototype = Tween.prototype; + +Tween.propHooks = { + _default: { + get: function( tween ) { + var result; + + // Use a property on the element directly when it is not a DOM element, + // or when there is no matching style property that exists. + if ( tween.elem.nodeType !== 1 || + tween.elem[ tween.prop ] != null && tween.elem.style[ tween.prop ] == null ) { + return tween.elem[ tween.prop ]; + } + + // Passing an empty string as a 3rd parameter to .css will automatically + // attempt a parseFloat and fallback to a string if the parse fails. + // Simple values such as "10px" are parsed to Float; + // complex values such as "rotate(1rad)" are returned as-is. + result = jQuery.css( tween.elem, tween.prop, "" ); + + // Empty strings, null, undefined and "auto" are converted to 0. + return !result || result === "auto" ? 0 : result; + }, + set: function( tween ) { + + // Use step hook for back compat. + // Use cssHook if its there. + // Use .style if available and use plain properties where available. + if ( jQuery.fx.step[ tween.prop ] ) { + jQuery.fx.step[ tween.prop ]( tween ); + } else if ( tween.elem.nodeType === 1 && + ( tween.elem.style[ jQuery.cssProps[ tween.prop ] ] != null || + jQuery.cssHooks[ tween.prop ] ) ) { + jQuery.style( tween.elem, tween.prop, tween.now + tween.unit ); + } else { + tween.elem[ tween.prop ] = tween.now; + } + } + } +}; + +// Support: IE <=9 only +// Panic based approach to setting things on disconnected nodes +Tween.propHooks.scrollTop = Tween.propHooks.scrollLeft = { + set: function( tween ) { + if ( tween.elem.nodeType && tween.elem.parentNode ) { + tween.elem[ tween.prop ] = tween.now; + } + } +}; + +jQuery.easing = { + linear: function( p ) { + return p; + }, + swing: function( p ) { + return 0.5 - Math.cos( p * Math.PI ) / 2; + }, + _default: "swing" +}; + +jQuery.fx = Tween.prototype.init; + +// Back compat <1.8 extension point +jQuery.fx.step = {}; + + + + +var + fxNow, inProgress, + rfxtypes = /^(?:toggle|show|hide)$/, + rrun = /queueHooks$/; + +function schedule() { + if ( inProgress ) { + if ( document.hidden === false && window.requestAnimationFrame ) { + window.requestAnimationFrame( schedule ); + } else { + window.setTimeout( schedule, jQuery.fx.interval ); + } + + jQuery.fx.tick(); + } +} + +// Animations created synchronously will run synchronously +function createFxNow() { + window.setTimeout( function() { + fxNow = undefined; + } ); + return ( fxNow = jQuery.now() ); +} + +// Generate parameters to create a standard animation +function genFx( type, includeWidth ) { + var which, + i = 0, + attrs = { height: type }; + + // If we include width, step value is 1 to do all cssExpand values, + // otherwise step value is 2 to skip over Left and Right + includeWidth = includeWidth ? 1 : 0; + for ( ; i < 4; i += 2 - includeWidth ) { + which = cssExpand[ i ]; + attrs[ "margin" + which ] = attrs[ "padding" + which ] = type; + } + + if ( includeWidth ) { + attrs.opacity = attrs.width = type; + } + + return attrs; +} + +function createTween( value, prop, animation ) { + var tween, + collection = ( Animation.tweeners[ prop ] || [] ).concat( Animation.tweeners[ "*" ] ), + index = 0, + length = collection.length; + for ( ; index < length; index++ ) { + if ( ( tween = collection[ index ].call( animation, prop, value ) ) ) { + + // We're done with this property + return tween; + } + } +} + +function defaultPrefilter( elem, props, opts ) { + var prop, value, toggle, hooks, oldfire, propTween, restoreDisplay, display, + isBox = "width" in props || "height" in props, + anim = this, + orig = {}, + style = elem.style, + hidden = elem.nodeType && isHiddenWithinTree( elem ), + dataShow = dataPriv.get( elem, "fxshow" ); + + // Queue-skipping animations hijack the fx hooks + if ( !opts.queue ) { + hooks = jQuery._queueHooks( elem, "fx" ); + if ( hooks.unqueued == null ) { + hooks.unqueued = 0; + oldfire = hooks.empty.fire; + hooks.empty.fire = function() { + if ( !hooks.unqueued ) { + oldfire(); + } + }; + } + hooks.unqueued++; + + anim.always( function() { + + // Ensure the complete handler is called before this completes + anim.always( function() { + hooks.unqueued--; + if ( !jQuery.queue( elem, "fx" ).length ) { + hooks.empty.fire(); + } + } ); + } ); + } + + // Detect show/hide animations + for ( prop in props ) { + value = props[ prop ]; + if ( rfxtypes.test( value ) ) { + delete props[ prop ]; + toggle = toggle || value === "toggle"; + if ( value === ( hidden ? "hide" : "show" ) ) { + + // Pretend to be hidden if this is a "show" and + // there is still data from a stopped show/hide + if ( value === "show" && dataShow && dataShow[ prop ] !== undefined ) { + hidden = true; + + // Ignore all other no-op show/hide data + } else { + continue; + } + } + orig[ prop ] = dataShow && dataShow[ prop ] || jQuery.style( elem, prop ); + } + } + + // Bail out if this is a no-op like .hide().hide() + propTween = !jQuery.isEmptyObject( props ); + if ( !propTween && jQuery.isEmptyObject( orig ) ) { + return; + } + + // Restrict "overflow" and "display" styles during box animations + if ( isBox && elem.nodeType === 1 ) { + + // Support: IE <=9 - 11, Edge 12 - 13 + // Record all 3 overflow attributes because IE does not infer the shorthand + // from identically-valued overflowX and overflowY + opts.overflow = [ style.overflow, style.overflowX, style.overflowY ]; + + // Identify a display type, preferring old show/hide data over the CSS cascade + restoreDisplay = dataShow && dataShow.display; + if ( restoreDisplay == null ) { + restoreDisplay = dataPriv.get( elem, "display" ); + } + display = jQuery.css( elem, "display" ); + if ( display === "none" ) { + if ( restoreDisplay ) { + display = restoreDisplay; + } else { + + // Get nonempty value(s) by temporarily forcing visibility + showHide( [ elem ], true ); + restoreDisplay = elem.style.display || restoreDisplay; + display = jQuery.css( elem, "display" ); + showHide( [ elem ] ); + } + } + + // Animate inline elements as inline-block + if ( display === "inline" || display === "inline-block" && restoreDisplay != null ) { + if ( jQuery.css( elem, "float" ) === "none" ) { + + // Restore the original display value at the end of pure show/hide animations + if ( !propTween ) { + anim.done( function() { + style.display = restoreDisplay; + } ); + if ( restoreDisplay == null ) { + display = style.display; + restoreDisplay = display === "none" ? "" : display; + } + } + style.display = "inline-block"; + } + } + } + + if ( opts.overflow ) { + style.overflow = "hidden"; + anim.always( function() { + style.overflow = opts.overflow[ 0 ]; + style.overflowX = opts.overflow[ 1 ]; + style.overflowY = opts.overflow[ 2 ]; + } ); + } + + // Implement show/hide animations + propTween = false; + for ( prop in orig ) { + + // General show/hide setup for this element animation + if ( !propTween ) { + if ( dataShow ) { + if ( "hidden" in dataShow ) { + hidden = dataShow.hidden; + } + } else { + dataShow = dataPriv.access( elem, "fxshow", { display: restoreDisplay } ); + } + + // Store hidden/visible for toggle so `.stop().toggle()` "reverses" + if ( toggle ) { + dataShow.hidden = !hidden; + } + + // Show elements before animating them + if ( hidden ) { + showHide( [ elem ], true ); + } + + /* eslint-disable no-loop-func */ + + anim.done( function() { + + /* eslint-enable no-loop-func */ + + // The final step of a "hide" animation is actually hiding the element + if ( !hidden ) { + showHide( [ elem ] ); + } + dataPriv.remove( elem, "fxshow" ); + for ( prop in orig ) { + jQuery.style( elem, prop, orig[ prop ] ); + } + } ); + } + + // Per-property setup + propTween = createTween( hidden ? dataShow[ prop ] : 0, prop, anim ); + if ( !( prop in dataShow ) ) { + dataShow[ prop ] = propTween.start; + if ( hidden ) { + propTween.end = propTween.start; + propTween.start = 0; + } + } + } +} + +function propFilter( props, specialEasing ) { + var index, name, easing, value, hooks; + + // camelCase, specialEasing and expand cssHook pass + for ( index in props ) { + name = jQuery.camelCase( index ); + easing = specialEasing[ name ]; + value = props[ index ]; + if ( Array.isArray( value ) ) { + easing = value[ 1 ]; + value = props[ index ] = value[ 0 ]; + } + + if ( index !== name ) { + props[ name ] = value; + delete props[ index ]; + } + + hooks = jQuery.cssHooks[ name ]; + if ( hooks && "expand" in hooks ) { + value = hooks.expand( value ); + delete props[ name ]; + + // Not quite $.extend, this won't overwrite existing keys. + // Reusing 'index' because we have the correct "name" + for ( index in value ) { + if ( !( index in props ) ) { + props[ index ] = value[ index ]; + specialEasing[ index ] = easing; + } + } + } else { + specialEasing[ name ] = easing; + } + } +} + +function Animation( elem, properties, options ) { + var result, + stopped, + index = 0, + length = Animation.prefilters.length, + deferred = jQuery.Deferred().always( function() { + + // Don't match elem in the :animated selector + delete tick.elem; + } ), + tick = function() { + if ( stopped ) { + return false; + } + var currentTime = fxNow || createFxNow(), + remaining = Math.max( 0, animation.startTime + animation.duration - currentTime ), + + // Support: Android 2.3 only + // Archaic crash bug won't allow us to use `1 - ( 0.5 || 0 )` (#12497) + temp = remaining / animation.duration || 0, + percent = 1 - temp, + index = 0, + length = animation.tweens.length; + + for ( ; index < length; index++ ) { + animation.tweens[ index ].run( percent ); + } + + deferred.notifyWith( elem, [ animation, percent, remaining ] ); + + // If there's more to do, yield + if ( percent < 1 && length ) { + return remaining; + } + + // If this was an empty animation, synthesize a final progress notification + if ( !length ) { + deferred.notifyWith( elem, [ animation, 1, 0 ] ); + } + + // Resolve the animation and report its conclusion + deferred.resolveWith( elem, [ animation ] ); + return false; + }, + animation = deferred.promise( { + elem: elem, + props: jQuery.extend( {}, properties ), + opts: jQuery.extend( true, { + specialEasing: {}, + easing: jQuery.easing._default + }, options ), + originalProperties: properties, + originalOptions: options, + startTime: fxNow || createFxNow(), + duration: options.duration, + tweens: [], + createTween: function( prop, end ) { + var tween = jQuery.Tween( elem, animation.opts, prop, end, + animation.opts.specialEasing[ prop ] || animation.opts.easing ); + animation.tweens.push( tween ); + return tween; + }, + stop: function( gotoEnd ) { + var index = 0, + + // If we are going to the end, we want to run all the tweens + // otherwise we skip this part + length = gotoEnd ? animation.tweens.length : 0; + if ( stopped ) { + return this; + } + stopped = true; + for ( ; index < length; index++ ) { + animation.tweens[ index ].run( 1 ); + } + + // Resolve when we played the last frame; otherwise, reject + if ( gotoEnd ) { + deferred.notifyWith( elem, [ animation, 1, 0 ] ); + deferred.resolveWith( elem, [ animation, gotoEnd ] ); + } else { + deferred.rejectWith( elem, [ animation, gotoEnd ] ); + } + return this; + } + } ), + props = animation.props; + + propFilter( props, animation.opts.specialEasing ); + + for ( ; index < length; index++ ) { + result = Animation.prefilters[ index ].call( animation, elem, props, animation.opts ); + if ( result ) { + if ( jQuery.isFunction( result.stop ) ) { + jQuery._queueHooks( animation.elem, animation.opts.queue ).stop = + jQuery.proxy( result.stop, result ); + } + return result; + } + } + + jQuery.map( props, createTween, animation ); + + if ( jQuery.isFunction( animation.opts.start ) ) { + animation.opts.start.call( elem, animation ); + } + + // Attach callbacks from options + animation + .progress( animation.opts.progress ) + .done( animation.opts.done, animation.opts.complete ) + .fail( animation.opts.fail ) + .always( animation.opts.always ); + + jQuery.fx.timer( + jQuery.extend( tick, { + elem: elem, + anim: animation, + queue: animation.opts.queue + } ) + ); + + return animation; +} + +jQuery.Animation = jQuery.extend( Animation, { + + tweeners: { + "*": [ function( prop, value ) { + var tween = this.createTween( prop, value ); + adjustCSS( tween.elem, prop, rcssNum.exec( value ), tween ); + return tween; + } ] + }, + + tweener: function( props, callback ) { + if ( jQuery.isFunction( props ) ) { + callback = props; + props = [ "*" ]; + } else { + props = props.match( rnothtmlwhite ); + } + + var prop, + index = 0, + length = props.length; + + for ( ; index < length; index++ ) { + prop = props[ index ]; + Animation.tweeners[ prop ] = Animation.tweeners[ prop ] || []; + Animation.tweeners[ prop ].unshift( callback ); + } + }, + + prefilters: [ defaultPrefilter ], + + prefilter: function( callback, prepend ) { + if ( prepend ) { + Animation.prefilters.unshift( callback ); + } else { + Animation.prefilters.push( callback ); + } + } +} ); + +jQuery.speed = function( speed, easing, fn ) { + var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : { + complete: fn || !fn && easing || + jQuery.isFunction( speed ) && speed, + duration: speed, + easing: fn && easing || easing && !jQuery.isFunction( easing ) && easing + }; + + // Go to the end state if fx are off + if ( jQuery.fx.off ) { + opt.duration = 0; + + } else { + if ( typeof opt.duration !== "number" ) { + if ( opt.duration in jQuery.fx.speeds ) { + opt.duration = jQuery.fx.speeds[ opt.duration ]; + + } else { + opt.duration = jQuery.fx.speeds._default; + } + } + } + + // Normalize opt.queue - true/undefined/null -> "fx" + if ( opt.queue == null || opt.queue === true ) { + opt.queue = "fx"; + } + + // Queueing + opt.old = opt.complete; + + opt.complete = function() { + if ( jQuery.isFunction( opt.old ) ) { + opt.old.call( this ); + } + + if ( opt.queue ) { + jQuery.dequeue( this, opt.queue ); + } + }; + + return opt; +}; + +jQuery.fn.extend( { + fadeTo: function( speed, to, easing, callback ) { + + // Show any hidden elements after setting opacity to 0 + return this.filter( isHiddenWithinTree ).css( "opacity", 0 ).show() + + // Animate to the value specified + .end().animate( { opacity: to }, speed, easing, callback ); + }, + animate: function( prop, speed, easing, callback ) { + var empty = jQuery.isEmptyObject( prop ), + optall = jQuery.speed( speed, easing, callback ), + doAnimation = function() { + + // Operate on a copy of prop so per-property easing won't be lost + var anim = Animation( this, jQuery.extend( {}, prop ), optall ); + + // Empty animations, or finishing resolves immediately + if ( empty || dataPriv.get( this, "finish" ) ) { + anim.stop( true ); + } + }; + doAnimation.finish = doAnimation; + + return empty || optall.queue === false ? + this.each( doAnimation ) : + this.queue( optall.queue, doAnimation ); + }, + stop: function( type, clearQueue, gotoEnd ) { + var stopQueue = function( hooks ) { + var stop = hooks.stop; + delete hooks.stop; + stop( gotoEnd ); + }; + + if ( typeof type !== "string" ) { + gotoEnd = clearQueue; + clearQueue = type; + type = undefined; + } + if ( clearQueue && type !== false ) { + this.queue( type || "fx", [] ); + } + + return this.each( function() { + var dequeue = true, + index = type != null && type + "queueHooks", + timers = jQuery.timers, + data = dataPriv.get( this ); + + if ( index ) { + if ( data[ index ] && data[ index ].stop ) { + stopQueue( data[ index ] ); + } + } else { + for ( index in data ) { + if ( data[ index ] && data[ index ].stop && rrun.test( index ) ) { + stopQueue( data[ index ] ); + } + } + } + + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && + ( type == null || timers[ index ].queue === type ) ) { + + timers[ index ].anim.stop( gotoEnd ); + dequeue = false; + timers.splice( index, 1 ); + } + } + + // Start the next in the queue if the last step wasn't forced. + // Timers currently will call their complete callbacks, which + // will dequeue but only if they were gotoEnd. + if ( dequeue || !gotoEnd ) { + jQuery.dequeue( this, type ); + } + } ); + }, + finish: function( type ) { + if ( type !== false ) { + type = type || "fx"; + } + return this.each( function() { + var index, + data = dataPriv.get( this ), + queue = data[ type + "queue" ], + hooks = data[ type + "queueHooks" ], + timers = jQuery.timers, + length = queue ? queue.length : 0; + + // Enable finishing flag on private data + data.finish = true; + + // Empty the queue first + jQuery.queue( this, type, [] ); + + if ( hooks && hooks.stop ) { + hooks.stop.call( this, true ); + } + + // Look for any active animations, and finish them + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && timers[ index ].queue === type ) { + timers[ index ].anim.stop( true ); + timers.splice( index, 1 ); + } + } + + // Look for any animations in the old queue and finish them + for ( index = 0; index < length; index++ ) { + if ( queue[ index ] && queue[ index ].finish ) { + queue[ index ].finish.call( this ); + } + } + + // Turn off finishing flag + delete data.finish; + } ); + } +} ); + +jQuery.each( [ "toggle", "show", "hide" ], function( i, name ) { + var cssFn = jQuery.fn[ name ]; + jQuery.fn[ name ] = function( speed, easing, callback ) { + return speed == null || typeof speed === "boolean" ? + cssFn.apply( this, arguments ) : + this.animate( genFx( name, true ), speed, easing, callback ); + }; +} ); + +// Generate shortcuts for custom animations +jQuery.each( { + slideDown: genFx( "show" ), + slideUp: genFx( "hide" ), + slideToggle: genFx( "toggle" ), + fadeIn: { opacity: "show" }, + fadeOut: { opacity: "hide" }, + fadeToggle: { opacity: "toggle" } +}, function( name, props ) { + jQuery.fn[ name ] = function( speed, easing, callback ) { + return this.animate( props, speed, easing, callback ); + }; +} ); + +jQuery.timers = []; +jQuery.fx.tick = function() { + var timer, + i = 0, + timers = jQuery.timers; + + fxNow = jQuery.now(); + + for ( ; i < timers.length; i++ ) { + timer = timers[ i ]; + + // Run the timer and safely remove it when done (allowing for external removal) + if ( !timer() && timers[ i ] === timer ) { + timers.splice( i--, 1 ); + } + } + + if ( !timers.length ) { + jQuery.fx.stop(); + } + fxNow = undefined; +}; + +jQuery.fx.timer = function( timer ) { + jQuery.timers.push( timer ); + jQuery.fx.start(); +}; + +jQuery.fx.interval = 13; +jQuery.fx.start = function() { + if ( inProgress ) { + return; + } + + inProgress = true; + schedule(); +}; + +jQuery.fx.stop = function() { + inProgress = null; +}; + +jQuery.fx.speeds = { + slow: 600, + fast: 200, + + // Default speed + _default: 400 +}; + + +// Based off of the plugin by Clint Helfers, with permission. +// https://web.archive.org/web/20100324014747/http://blindsignals.com/index.php/2009/07/jquery-delay/ +jQuery.fn.delay = function( time, type ) { + time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time; + type = type || "fx"; + + return this.queue( type, function( next, hooks ) { + var timeout = window.setTimeout( next, time ); + hooks.stop = function() { + window.clearTimeout( timeout ); + }; + } ); +}; + + +( function() { + var input = document.createElement( "input" ), + select = document.createElement( "select" ), + opt = select.appendChild( document.createElement( "option" ) ); + + input.type = "checkbox"; + + // Support: Android <=4.3 only + // Default value for a checkbox should be "on" + support.checkOn = input.value !== ""; + + // Support: IE <=11 only + // Must access selectedIndex to make default options select + support.optSelected = opt.selected; + + // Support: IE <=11 only + // An input loses its value after becoming a radio + input = document.createElement( "input" ); + input.value = "t"; + input.type = "radio"; + support.radioValue = input.value === "t"; +} )(); + + +var boolHook, + attrHandle = jQuery.expr.attrHandle; + +jQuery.fn.extend( { + attr: function( name, value ) { + return access( this, jQuery.attr, name, value, arguments.length > 1 ); + }, + + removeAttr: function( name ) { + return this.each( function() { + jQuery.removeAttr( this, name ); + } ); + } +} ); + +jQuery.extend( { + attr: function( elem, name, value ) { + var ret, hooks, + nType = elem.nodeType; + + // Don't get/set attributes on text, comment and attribute nodes + if ( nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + // Fallback to prop when attributes are not supported + if ( typeof elem.getAttribute === "undefined" ) { + return jQuery.prop( elem, name, value ); + } + + // Attribute hooks are determined by the lowercase version + // Grab necessary hook if one is defined + if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { + hooks = jQuery.attrHooks[ name.toLowerCase() ] || + ( jQuery.expr.match.bool.test( name ) ? boolHook : undefined ); + } + + if ( value !== undefined ) { + if ( value === null ) { + jQuery.removeAttr( elem, name ); + return; + } + + if ( hooks && "set" in hooks && + ( ret = hooks.set( elem, value, name ) ) !== undefined ) { + return ret; + } + + elem.setAttribute( name, value + "" ); + return value; + } + + if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { + return ret; + } + + ret = jQuery.find.attr( elem, name ); + + // Non-existent attributes return null, we normalize to undefined + return ret == null ? undefined : ret; + }, + + attrHooks: { + type: { + set: function( elem, value ) { + if ( !support.radioValue && value === "radio" && + nodeName( elem, "input" ) ) { + var val = elem.value; + elem.setAttribute( "type", value ); + if ( val ) { + elem.value = val; + } + return value; + } + } + } + }, + + removeAttr: function( elem, value ) { + var name, + i = 0, + + // Attribute names can contain non-HTML whitespace characters + // https://html.spec.whatwg.org/multipage/syntax.html#attributes-2 + attrNames = value && value.match( rnothtmlwhite ); + + if ( attrNames && elem.nodeType === 1 ) { + while ( ( name = attrNames[ i++ ] ) ) { + elem.removeAttribute( name ); + } + } + } +} ); + +// Hooks for boolean attributes +boolHook = { + set: function( elem, value, name ) { + if ( value === false ) { + + // Remove boolean attributes when set to false + jQuery.removeAttr( elem, name ); + } else { + elem.setAttribute( name, name ); + } + return name; + } +}; + +jQuery.each( jQuery.expr.match.bool.source.match( /\w+/g ), function( i, name ) { + var getter = attrHandle[ name ] || jQuery.find.attr; + + attrHandle[ name ] = function( elem, name, isXML ) { + var ret, handle, + lowercaseName = name.toLowerCase(); + + if ( !isXML ) { + + // Avoid an infinite loop by temporarily removing this function from the getter + handle = attrHandle[ lowercaseName ]; + attrHandle[ lowercaseName ] = ret; + ret = getter( elem, name, isXML ) != null ? + lowercaseName : + null; + attrHandle[ lowercaseName ] = handle; + } + return ret; + }; +} ); + + + + +var rfocusable = /^(?:input|select|textarea|button)$/i, + rclickable = /^(?:a|area)$/i; + +jQuery.fn.extend( { + prop: function( name, value ) { + return access( this, jQuery.prop, name, value, arguments.length > 1 ); + }, + + removeProp: function( name ) { + return this.each( function() { + delete this[ jQuery.propFix[ name ] || name ]; + } ); + } +} ); + +jQuery.extend( { + prop: function( elem, name, value ) { + var ret, hooks, + nType = elem.nodeType; + + // Don't get/set properties on text, comment and attribute nodes + if ( nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { + + // Fix name and attach hooks + name = jQuery.propFix[ name ] || name; + hooks = jQuery.propHooks[ name ]; + } + + if ( value !== undefined ) { + if ( hooks && "set" in hooks && + ( ret = hooks.set( elem, value, name ) ) !== undefined ) { + return ret; + } + + return ( elem[ name ] = value ); + } + + if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { + return ret; + } + + return elem[ name ]; + }, + + propHooks: { + tabIndex: { + get: function( elem ) { + + // Support: IE <=9 - 11 only + // elem.tabIndex doesn't always return the + // correct value when it hasn't been explicitly set + // https://web.archive.org/web/20141116233347/http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/ + // Use proper attribute retrieval(#12072) + var tabindex = jQuery.find.attr( elem, "tabindex" ); + + if ( tabindex ) { + return parseInt( tabindex, 10 ); + } + + if ( + rfocusable.test( elem.nodeName ) || + rclickable.test( elem.nodeName ) && + elem.href + ) { + return 0; + } + + return -1; + } + } + }, + + propFix: { + "for": "htmlFor", + "class": "className" + } +} ); + +// Support: IE <=11 only +// Accessing the selectedIndex property +// forces the browser to respect setting selected +// on the option +// The getter ensures a default option is selected +// when in an optgroup +// eslint rule "no-unused-expressions" is disabled for this code +// since it considers such accessions noop +if ( !support.optSelected ) { + jQuery.propHooks.selected = { + get: function( elem ) { + + /* eslint no-unused-expressions: "off" */ + + var parent = elem.parentNode; + if ( parent && parent.parentNode ) { + parent.parentNode.selectedIndex; + } + return null; + }, + set: function( elem ) { + + /* eslint no-unused-expressions: "off" */ + + var parent = elem.parentNode; + if ( parent ) { + parent.selectedIndex; + + if ( parent.parentNode ) { + parent.parentNode.selectedIndex; + } + } + } + }; +} + +jQuery.each( [ + "tabIndex", + "readOnly", + "maxLength", + "cellSpacing", + "cellPadding", + "rowSpan", + "colSpan", + "useMap", + "frameBorder", + "contentEditable" +], function() { + jQuery.propFix[ this.toLowerCase() ] = this; +} ); + + + + + // Strip and collapse whitespace according to HTML spec + // https://html.spec.whatwg.org/multipage/infrastructure.html#strip-and-collapse-whitespace + function stripAndCollapse( value ) { + var tokens = value.match( rnothtmlwhite ) || []; + return tokens.join( " " ); + } + + +function getClass( elem ) { + return elem.getAttribute && elem.getAttribute( "class" ) || ""; +} + +jQuery.fn.extend( { + addClass: function( value ) { + var classes, elem, cur, curValue, clazz, j, finalValue, + i = 0; + + if ( jQuery.isFunction( value ) ) { + return this.each( function( j ) { + jQuery( this ).addClass( value.call( this, j, getClass( this ) ) ); + } ); + } + + if ( typeof value === "string" && value ) { + classes = value.match( rnothtmlwhite ) || []; + + while ( ( elem = this[ i++ ] ) ) { + curValue = getClass( elem ); + cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); + + if ( cur ) { + j = 0; + while ( ( clazz = classes[ j++ ] ) ) { + if ( cur.indexOf( " " + clazz + " " ) < 0 ) { + cur += clazz + " "; + } + } + + // Only assign if different to avoid unneeded rendering. + finalValue = stripAndCollapse( cur ); + if ( curValue !== finalValue ) { + elem.setAttribute( "class", finalValue ); + } + } + } + } + + return this; + }, + + removeClass: function( value ) { + var classes, elem, cur, curValue, clazz, j, finalValue, + i = 0; + + if ( jQuery.isFunction( value ) ) { + return this.each( function( j ) { + jQuery( this ).removeClass( value.call( this, j, getClass( this ) ) ); + } ); + } + + if ( !arguments.length ) { + return this.attr( "class", "" ); + } + + if ( typeof value === "string" && value ) { + classes = value.match( rnothtmlwhite ) || []; + + while ( ( elem = this[ i++ ] ) ) { + curValue = getClass( elem ); + + // This expression is here for better compressibility (see addClass) + cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); + + if ( cur ) { + j = 0; + while ( ( clazz = classes[ j++ ] ) ) { + + // Remove *all* instances + while ( cur.indexOf( " " + clazz + " " ) > -1 ) { + cur = cur.replace( " " + clazz + " ", " " ); + } + } + + // Only assign if different to avoid unneeded rendering. + finalValue = stripAndCollapse( cur ); + if ( curValue !== finalValue ) { + elem.setAttribute( "class", finalValue ); + } + } + } + } + + return this; + }, + + toggleClass: function( value, stateVal ) { + var type = typeof value; + + if ( typeof stateVal === "boolean" && type === "string" ) { + return stateVal ? this.addClass( value ) : this.removeClass( value ); + } + + if ( jQuery.isFunction( value ) ) { + return this.each( function( i ) { + jQuery( this ).toggleClass( + value.call( this, i, getClass( this ), stateVal ), + stateVal + ); + } ); + } + + return this.each( function() { + var className, i, self, classNames; + + if ( type === "string" ) { + + // Toggle individual class names + i = 0; + self = jQuery( this ); + classNames = value.match( rnothtmlwhite ) || []; + + while ( ( className = classNames[ i++ ] ) ) { + + // Check each className given, space separated list + if ( self.hasClass( className ) ) { + self.removeClass( className ); + } else { + self.addClass( className ); + } + } + + // Toggle whole class name + } else if ( value === undefined || type === "boolean" ) { + className = getClass( this ); + if ( className ) { + + // Store className if set + dataPriv.set( this, "__className__", className ); + } + + // If the element has a class name or if we're passed `false`, + // then remove the whole classname (if there was one, the above saved it). + // Otherwise bring back whatever was previously saved (if anything), + // falling back to the empty string if nothing was stored. + if ( this.setAttribute ) { + this.setAttribute( "class", + className || value === false ? + "" : + dataPriv.get( this, "__className__" ) || "" + ); + } + } + } ); + }, + + hasClass: function( selector ) { + var className, elem, + i = 0; + + className = " " + selector + " "; + while ( ( elem = this[ i++ ] ) ) { + if ( elem.nodeType === 1 && + ( " " + stripAndCollapse( getClass( elem ) ) + " " ).indexOf( className ) > -1 ) { + return true; + } + } + + return false; + } +} ); + + + + +var rreturn = /\r/g; + +jQuery.fn.extend( { + val: function( value ) { + var hooks, ret, isFunction, + elem = this[ 0 ]; + + if ( !arguments.length ) { + if ( elem ) { + hooks = jQuery.valHooks[ elem.type ] || + jQuery.valHooks[ elem.nodeName.toLowerCase() ]; + + if ( hooks && + "get" in hooks && + ( ret = hooks.get( elem, "value" ) ) !== undefined + ) { + return ret; + } + + ret = elem.value; + + // Handle most common string cases + if ( typeof ret === "string" ) { + return ret.replace( rreturn, "" ); + } + + // Handle cases where value is null/undef or number + return ret == null ? "" : ret; + } + + return; + } + + isFunction = jQuery.isFunction( value ); + + return this.each( function( i ) { + var val; + + if ( this.nodeType !== 1 ) { + return; + } + + if ( isFunction ) { + val = value.call( this, i, jQuery( this ).val() ); + } else { + val = value; + } + + // Treat null/undefined as ""; convert numbers to string + if ( val == null ) { + val = ""; + + } else if ( typeof val === "number" ) { + val += ""; + + } else if ( Array.isArray( val ) ) { + val = jQuery.map( val, function( value ) { + return value == null ? "" : value + ""; + } ); + } + + hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ]; + + // If set returns undefined, fall back to normal setting + if ( !hooks || !( "set" in hooks ) || hooks.set( this, val, "value" ) === undefined ) { + this.value = val; + } + } ); + } +} ); + +jQuery.extend( { + valHooks: { + option: { + get: function( elem ) { + + var val = jQuery.find.attr( elem, "value" ); + return val != null ? + val : + + // Support: IE <=10 - 11 only + // option.text throws exceptions (#14686, #14858) + // Strip and collapse whitespace + // https://html.spec.whatwg.org/#strip-and-collapse-whitespace + stripAndCollapse( jQuery.text( elem ) ); + } + }, + select: { + get: function( elem ) { + var value, option, i, + options = elem.options, + index = elem.selectedIndex, + one = elem.type === "select-one", + values = one ? null : [], + max = one ? index + 1 : options.length; + + if ( index < 0 ) { + i = max; + + } else { + i = one ? index : 0; + } + + // Loop through all the selected options + for ( ; i < max; i++ ) { + option = options[ i ]; + + // Support: IE <=9 only + // IE8-9 doesn't update selected after form reset (#2551) + if ( ( option.selected || i === index ) && + + // Don't return options that are disabled or in a disabled optgroup + !option.disabled && + ( !option.parentNode.disabled || + !nodeName( option.parentNode, "optgroup" ) ) ) { + + // Get the specific value for the option + value = jQuery( option ).val(); + + // We don't need an array for one selects + if ( one ) { + return value; + } + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + }, + + set: function( elem, value ) { + var optionSet, option, + options = elem.options, + values = jQuery.makeArray( value ), + i = options.length; + + while ( i-- ) { + option = options[ i ]; + + /* eslint-disable no-cond-assign */ + + if ( option.selected = + jQuery.inArray( jQuery.valHooks.option.get( option ), values ) > -1 + ) { + optionSet = true; + } + + /* eslint-enable no-cond-assign */ + } + + // Force browsers to behave consistently when non-matching value is set + if ( !optionSet ) { + elem.selectedIndex = -1; + } + return values; + } + } + } +} ); + +// Radios and checkboxes getter/setter +jQuery.each( [ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = { + set: function( elem, value ) { + if ( Array.isArray( value ) ) { + return ( elem.checked = jQuery.inArray( jQuery( elem ).val(), value ) > -1 ); + } + } + }; + if ( !support.checkOn ) { + jQuery.valHooks[ this ].get = function( elem ) { + return elem.getAttribute( "value" ) === null ? "on" : elem.value; + }; + } +} ); + + + + +// Return jQuery for attributes-only inclusion + + +var rfocusMorph = /^(?:focusinfocus|focusoutblur)$/; + +jQuery.extend( jQuery.event, { + + trigger: function( event, data, elem, onlyHandlers ) { + + var i, cur, tmp, bubbleType, ontype, handle, special, + eventPath = [ elem || document ], + type = hasOwn.call( event, "type" ) ? event.type : event, + namespaces = hasOwn.call( event, "namespace" ) ? event.namespace.split( "." ) : []; + + cur = tmp = elem = elem || document; + + // Don't do events on text and comment nodes + if ( elem.nodeType === 3 || elem.nodeType === 8 ) { + return; + } + + // focus/blur morphs to focusin/out; ensure we're not firing them right now + if ( rfocusMorph.test( type + jQuery.event.triggered ) ) { + return; + } + + if ( type.indexOf( "." ) > -1 ) { + + // Namespaced trigger; create a regexp to match event type in handle() + namespaces = type.split( "." ); + type = namespaces.shift(); + namespaces.sort(); + } + ontype = type.indexOf( ":" ) < 0 && "on" + type; + + // Caller can pass in a jQuery.Event object, Object, or just an event type string + event = event[ jQuery.expando ] ? + event : + new jQuery.Event( type, typeof event === "object" && event ); + + // Trigger bitmask: & 1 for native handlers; & 2 for jQuery (always true) + event.isTrigger = onlyHandlers ? 2 : 3; + event.namespace = namespaces.join( "." ); + event.rnamespace = event.namespace ? + new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ) : + null; + + // Clean up the event in case it is being reused + event.result = undefined; + if ( !event.target ) { + event.target = elem; + } + + // Clone any incoming data and prepend the event, creating the handler arg list + data = data == null ? + [ event ] : + jQuery.makeArray( data, [ event ] ); + + // Allow special events to draw outside the lines + special = jQuery.event.special[ type ] || {}; + if ( !onlyHandlers && special.trigger && special.trigger.apply( elem, data ) === false ) { + return; + } + + // Determine event propagation path in advance, per W3C events spec (#9951) + // Bubble up to document, then to window; watch for a global ownerDocument var (#9724) + if ( !onlyHandlers && !special.noBubble && !jQuery.isWindow( elem ) ) { + + bubbleType = special.delegateType || type; + if ( !rfocusMorph.test( bubbleType + type ) ) { + cur = cur.parentNode; + } + for ( ; cur; cur = cur.parentNode ) { + eventPath.push( cur ); + tmp = cur; + } + + // Only add window if we got to document (e.g., not plain obj or detached DOM) + if ( tmp === ( elem.ownerDocument || document ) ) { + eventPath.push( tmp.defaultView || tmp.parentWindow || window ); + } + } + + // Fire handlers on the event path + i = 0; + while ( ( cur = eventPath[ i++ ] ) && !event.isPropagationStopped() ) { + + event.type = i > 1 ? + bubbleType : + special.bindType || type; + + // jQuery handler + handle = ( dataPriv.get( cur, "events" ) || {} )[ event.type ] && + dataPriv.get( cur, "handle" ); + if ( handle ) { + handle.apply( cur, data ); + } + + // Native handler + handle = ontype && cur[ ontype ]; + if ( handle && handle.apply && acceptData( cur ) ) { + event.result = handle.apply( cur, data ); + if ( event.result === false ) { + event.preventDefault(); + } + } + } + event.type = type; + + // If nobody prevented the default action, do it now + if ( !onlyHandlers && !event.isDefaultPrevented() ) { + + if ( ( !special._default || + special._default.apply( eventPath.pop(), data ) === false ) && + acceptData( elem ) ) { + + // Call a native DOM method on the target with the same name as the event. + // Don't do default actions on window, that's where global variables be (#6170) + if ( ontype && jQuery.isFunction( elem[ type ] ) && !jQuery.isWindow( elem ) ) { + + // Don't re-trigger an onFOO event when we call its FOO() method + tmp = elem[ ontype ]; + + if ( tmp ) { + elem[ ontype ] = null; + } + + // Prevent re-triggering of the same event, since we already bubbled it above + jQuery.event.triggered = type; + elem[ type ](); + jQuery.event.triggered = undefined; + + if ( tmp ) { + elem[ ontype ] = tmp; + } + } + } + } + + return event.result; + }, + + // Piggyback on a donor event to simulate a different one + // Used only for `focus(in | out)` events + simulate: function( type, elem, event ) { + var e = jQuery.extend( + new jQuery.Event(), + event, + { + type: type, + isSimulated: true + } + ); + + jQuery.event.trigger( e, null, elem ); + } + +} ); + +jQuery.fn.extend( { + + trigger: function( type, data ) { + return this.each( function() { + jQuery.event.trigger( type, data, this ); + } ); + }, + triggerHandler: function( type, data ) { + var elem = this[ 0 ]; + if ( elem ) { + return jQuery.event.trigger( type, data, elem, true ); + } + } +} ); + + +jQuery.each( ( "blur focus focusin focusout resize scroll click dblclick " + + "mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " + + "change select submit keydown keypress keyup contextmenu" ).split( " " ), + function( i, name ) { + + // Handle event binding + jQuery.fn[ name ] = function( data, fn ) { + return arguments.length > 0 ? + this.on( name, null, data, fn ) : + this.trigger( name ); + }; +} ); + +jQuery.fn.extend( { + hover: function( fnOver, fnOut ) { + return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver ); + } +} ); + + + + +support.focusin = "onfocusin" in window; + + +// Support: Firefox <=44 +// Firefox doesn't have focus(in | out) events +// Related ticket - https://bugzilla.mozilla.org/show_bug.cgi?id=687787 +// +// Support: Chrome <=48 - 49, Safari <=9.0 - 9.1 +// focus(in | out) events fire after focus & blur events, +// which is spec violation - http://www.w3.org/TR/DOM-Level-3-Events/#events-focusevent-event-order +// Related ticket - https://bugs.chromium.org/p/chromium/issues/detail?id=449857 +if ( !support.focusin ) { + jQuery.each( { focus: "focusin", blur: "focusout" }, function( orig, fix ) { + + // Attach a single capturing handler on the document while someone wants focusin/focusout + var handler = function( event ) { + jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ) ); + }; + + jQuery.event.special[ fix ] = { + setup: function() { + var doc = this.ownerDocument || this, + attaches = dataPriv.access( doc, fix ); + + if ( !attaches ) { + doc.addEventListener( orig, handler, true ); + } + dataPriv.access( doc, fix, ( attaches || 0 ) + 1 ); + }, + teardown: function() { + var doc = this.ownerDocument || this, + attaches = dataPriv.access( doc, fix ) - 1; + + if ( !attaches ) { + doc.removeEventListener( orig, handler, true ); + dataPriv.remove( doc, fix ); + + } else { + dataPriv.access( doc, fix, attaches ); + } + } + }; + } ); +} +var location = window.location; + +var nonce = jQuery.now(); + +var rquery = ( /\?/ ); + + + +// Cross-browser xml parsing +jQuery.parseXML = function( data ) { + var xml; + if ( !data || typeof data !== "string" ) { + return null; + } + + // Support: IE 9 - 11 only + // IE throws on parseFromString with invalid input. + try { + xml = ( new window.DOMParser() ).parseFromString( data, "text/xml" ); + } catch ( e ) { + xml = undefined; + } + + if ( !xml || xml.getElementsByTagName( "parsererror" ).length ) { + jQuery.error( "Invalid XML: " + data ); + } + return xml; +}; + + +var + rbracket = /\[\]$/, + rCRLF = /\r?\n/g, + rsubmitterTypes = /^(?:submit|button|image|reset|file)$/i, + rsubmittable = /^(?:input|select|textarea|keygen)/i; + +function buildParams( prefix, obj, traditional, add ) { + var name; + + if ( Array.isArray( obj ) ) { + + // Serialize array item. + jQuery.each( obj, function( i, v ) { + if ( traditional || rbracket.test( prefix ) ) { + + // Treat each array item as a scalar. + add( prefix, v ); + + } else { + + // Item is non-scalar (array or object), encode its numeric index. + buildParams( + prefix + "[" + ( typeof v === "object" && v != null ? i : "" ) + "]", + v, + traditional, + add + ); + } + } ); + + } else if ( !traditional && jQuery.type( obj ) === "object" ) { + + // Serialize object item. + for ( name in obj ) { + buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add ); + } + + } else { + + // Serialize scalar item. + add( prefix, obj ); + } +} + +// Serialize an array of form elements or a set of +// key/values into a query string +jQuery.param = function( a, traditional ) { + var prefix, + s = [], + add = function( key, valueOrFunction ) { + + // If value is a function, invoke it and use its return value + var value = jQuery.isFunction( valueOrFunction ) ? + valueOrFunction() : + valueOrFunction; + + s[ s.length ] = encodeURIComponent( key ) + "=" + + encodeURIComponent( value == null ? "" : value ); + }; + + // If an array was passed in, assume that it is an array of form elements. + if ( Array.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { + + // Serialize the form elements + jQuery.each( a, function() { + add( this.name, this.value ); + } ); + + } else { + + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for ( prefix in a ) { + buildParams( prefix, a[ prefix ], traditional, add ); + } + } + + // Return the resulting serialization + return s.join( "&" ); +}; + +jQuery.fn.extend( { + serialize: function() { + return jQuery.param( this.serializeArray() ); + }, + serializeArray: function() { + return this.map( function() { + + // Can add propHook for "elements" to filter or add form elements + var elements = jQuery.prop( this, "elements" ); + return elements ? jQuery.makeArray( elements ) : this; + } ) + .filter( function() { + var type = this.type; + + // Use .is( ":disabled" ) so that fieldset[disabled] works + return this.name && !jQuery( this ).is( ":disabled" ) && + rsubmittable.test( this.nodeName ) && !rsubmitterTypes.test( type ) && + ( this.checked || !rcheckableType.test( type ) ); + } ) + .map( function( i, elem ) { + var val = jQuery( this ).val(); + + if ( val == null ) { + return null; + } + + if ( Array.isArray( val ) ) { + return jQuery.map( val, function( val ) { + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + } ); + } + + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + } ).get(); + } +} ); + + +var + r20 = /%20/g, + rhash = /#.*$/, + rantiCache = /([?&])_=[^&]*/, + rheaders = /^(.*?):[ \t]*([^\r\n]*)$/mg, + + // #7653, #8125, #8152: local protocol detection + rlocalProtocol = /^(?:about|app|app-storage|.+-extension|file|res|widget):$/, + rnoContent = /^(?:GET|HEAD)$/, + rprotocol = /^\/\//, + + /* Prefilters + * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example) + * 2) These are called: + * - BEFORE asking for a transport + * - AFTER param serialization (s.data is a string if s.processData is true) + * 3) key is the dataType + * 4) the catchall symbol "*" can be used + * 5) execution will start with transport dataType and THEN continue down to "*" if needed + */ + prefilters = {}, + + /* Transports bindings + * 1) key is the dataType + * 2) the catchall symbol "*" can be used + * 3) selection will start with transport dataType and THEN go to "*" if needed + */ + transports = {}, + + // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression + allTypes = "*/".concat( "*" ), + + // Anchor tag for parsing the document origin + originAnchor = document.createElement( "a" ); + originAnchor.href = location.href; + +// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport +function addToPrefiltersOrTransports( structure ) { + + // dataTypeExpression is optional and defaults to "*" + return function( dataTypeExpression, func ) { + + if ( typeof dataTypeExpression !== "string" ) { + func = dataTypeExpression; + dataTypeExpression = "*"; + } + + var dataType, + i = 0, + dataTypes = dataTypeExpression.toLowerCase().match( rnothtmlwhite ) || []; + + if ( jQuery.isFunction( func ) ) { + + // For each dataType in the dataTypeExpression + while ( ( dataType = dataTypes[ i++ ] ) ) { + + // Prepend if requested + if ( dataType[ 0 ] === "+" ) { + dataType = dataType.slice( 1 ) || "*"; + ( structure[ dataType ] = structure[ dataType ] || [] ).unshift( func ); + + // Otherwise append + } else { + ( structure[ dataType ] = structure[ dataType ] || [] ).push( func ); + } + } + } + }; +} + +// Base inspection function for prefilters and transports +function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR ) { + + var inspected = {}, + seekingTransport = ( structure === transports ); + + function inspect( dataType ) { + var selected; + inspected[ dataType ] = true; + jQuery.each( structure[ dataType ] || [], function( _, prefilterOrFactory ) { + var dataTypeOrTransport = prefilterOrFactory( options, originalOptions, jqXHR ); + if ( typeof dataTypeOrTransport === "string" && + !seekingTransport && !inspected[ dataTypeOrTransport ] ) { + + options.dataTypes.unshift( dataTypeOrTransport ); + inspect( dataTypeOrTransport ); + return false; + } else if ( seekingTransport ) { + return !( selected = dataTypeOrTransport ); + } + } ); + return selected; + } + + return inspect( options.dataTypes[ 0 ] ) || !inspected[ "*" ] && inspect( "*" ); +} + +// A special extend for ajax options +// that takes "flat" options (not to be deep extended) +// Fixes #9887 +function ajaxExtend( target, src ) { + var key, deep, + flatOptions = jQuery.ajaxSettings.flatOptions || {}; + + for ( key in src ) { + if ( src[ key ] !== undefined ) { + ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ]; + } + } + if ( deep ) { + jQuery.extend( true, target, deep ); + } + + return target; +} + +/* Handles responses to an ajax request: + * - finds the right dataType (mediates between content-type and expected dataType) + * - returns the corresponding response + */ +function ajaxHandleResponses( s, jqXHR, responses ) { + + var ct, type, finalDataType, firstDataType, + contents = s.contents, + dataTypes = s.dataTypes; + + // Remove auto dataType and get content-type in the process + while ( dataTypes[ 0 ] === "*" ) { + dataTypes.shift(); + if ( ct === undefined ) { + ct = s.mimeType || jqXHR.getResponseHeader( "Content-Type" ); + } + } + + // Check if we're dealing with a known content-type + if ( ct ) { + for ( type in contents ) { + if ( contents[ type ] && contents[ type ].test( ct ) ) { + dataTypes.unshift( type ); + break; + } + } + } + + // Check to see if we have a response for the expected dataType + if ( dataTypes[ 0 ] in responses ) { + finalDataType = dataTypes[ 0 ]; + } else { + + // Try convertible dataTypes + for ( type in responses ) { + if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[ 0 ] ] ) { + finalDataType = type; + break; + } + if ( !firstDataType ) { + firstDataType = type; + } + } + + // Or just use first one + finalDataType = finalDataType || firstDataType; + } + + // If we found a dataType + // We add the dataType to the list if needed + // and return the corresponding response + if ( finalDataType ) { + if ( finalDataType !== dataTypes[ 0 ] ) { + dataTypes.unshift( finalDataType ); + } + return responses[ finalDataType ]; + } +} + +/* Chain conversions given the request and the original response + * Also sets the responseXXX fields on the jqXHR instance + */ +function ajaxConvert( s, response, jqXHR, isSuccess ) { + var conv2, current, conv, tmp, prev, + converters = {}, + + // Work with a copy of dataTypes in case we need to modify it for conversion + dataTypes = s.dataTypes.slice(); + + // Create converters map with lowercased keys + if ( dataTypes[ 1 ] ) { + for ( conv in s.converters ) { + converters[ conv.toLowerCase() ] = s.converters[ conv ]; + } + } + + current = dataTypes.shift(); + + // Convert to each sequential dataType + while ( current ) { + + if ( s.responseFields[ current ] ) { + jqXHR[ s.responseFields[ current ] ] = response; + } + + // Apply the dataFilter if provided + if ( !prev && isSuccess && s.dataFilter ) { + response = s.dataFilter( response, s.dataType ); + } + + prev = current; + current = dataTypes.shift(); + + if ( current ) { + + // There's only work to do if current dataType is non-auto + if ( current === "*" ) { + + current = prev; + + // Convert response if prev dataType is non-auto and differs from current + } else if ( prev !== "*" && prev !== current ) { + + // Seek a direct converter + conv = converters[ prev + " " + current ] || converters[ "* " + current ]; + + // If none found, seek a pair + if ( !conv ) { + for ( conv2 in converters ) { + + // If conv2 outputs current + tmp = conv2.split( " " ); + if ( tmp[ 1 ] === current ) { + + // If prev can be converted to accepted input + conv = converters[ prev + " " + tmp[ 0 ] ] || + converters[ "* " + tmp[ 0 ] ]; + if ( conv ) { + + // Condense equivalence converters + if ( conv === true ) { + conv = converters[ conv2 ]; + + // Otherwise, insert the intermediate dataType + } else if ( converters[ conv2 ] !== true ) { + current = tmp[ 0 ]; + dataTypes.unshift( tmp[ 1 ] ); + } + break; + } + } + } + } + + // Apply converter (if not an equivalence) + if ( conv !== true ) { + + // Unless errors are allowed to bubble, catch and return them + if ( conv && s.throws ) { + response = conv( response ); + } else { + try { + response = conv( response ); + } catch ( e ) { + return { + state: "parsererror", + error: conv ? e : "No conversion from " + prev + " to " + current + }; + } + } + } + } + } + } + + return { state: "success", data: response }; +} + +jQuery.extend( { + + // Counter for holding the number of active queries + active: 0, + + // Last-Modified header cache for next request + lastModified: {}, + etag: {}, + + ajaxSettings: { + url: location.href, + type: "GET", + isLocal: rlocalProtocol.test( location.protocol ), + global: true, + processData: true, + async: true, + contentType: "application/x-www-form-urlencoded; charset=UTF-8", + + /* + timeout: 0, + data: null, + dataType: null, + username: null, + password: null, + cache: null, + throws: false, + traditional: false, + headers: {}, + */ + + accepts: { + "*": allTypes, + text: "text/plain", + html: "text/html", + xml: "application/xml, text/xml", + json: "application/json, text/javascript" + }, + + contents: { + xml: /\bxml\b/, + html: /\bhtml/, + json: /\bjson\b/ + }, + + responseFields: { + xml: "responseXML", + text: "responseText", + json: "responseJSON" + }, + + // Data converters + // Keys separate source (or catchall "*") and destination types with a single space + converters: { + + // Convert anything to text + "* text": String, + + // Text to html (true = no transformation) + "text html": true, + + // Evaluate text as a json expression + "text json": JSON.parse, + + // Parse text as xml + "text xml": jQuery.parseXML + }, + + // For options that shouldn't be deep extended: + // you can add your own custom options here if + // and when you create one that shouldn't be + // deep extended (see ajaxExtend) + flatOptions: { + url: true, + context: true + } + }, + + // Creates a full fledged settings object into target + // with both ajaxSettings and settings fields. + // If target is omitted, writes into ajaxSettings. + ajaxSetup: function( target, settings ) { + return settings ? + + // Building a settings object + ajaxExtend( ajaxExtend( target, jQuery.ajaxSettings ), settings ) : + + // Extending ajaxSettings + ajaxExtend( jQuery.ajaxSettings, target ); + }, + + ajaxPrefilter: addToPrefiltersOrTransports( prefilters ), + ajaxTransport: addToPrefiltersOrTransports( transports ), + + // Main method + ajax: function( url, options ) { + + // If url is an object, simulate pre-1.5 signature + if ( typeof url === "object" ) { + options = url; + url = undefined; + } + + // Force options to be an object + options = options || {}; + + var transport, + + // URL without anti-cache param + cacheURL, + + // Response headers + responseHeadersString, + responseHeaders, + + // timeout handle + timeoutTimer, + + // Url cleanup var + urlAnchor, + + // Request state (becomes false upon send and true upon completion) + completed, + + // To know if global events are to be dispatched + fireGlobals, + + // Loop variable + i, + + // uncached part of the url + uncached, + + // Create the final options object + s = jQuery.ajaxSetup( {}, options ), + + // Callbacks context + callbackContext = s.context || s, + + // Context for global events is callbackContext if it is a DOM node or jQuery collection + globalEventContext = s.context && + ( callbackContext.nodeType || callbackContext.jquery ) ? + jQuery( callbackContext ) : + jQuery.event, + + // Deferreds + deferred = jQuery.Deferred(), + completeDeferred = jQuery.Callbacks( "once memory" ), + + // Status-dependent callbacks + statusCode = s.statusCode || {}, + + // Headers (they are sent all at once) + requestHeaders = {}, + requestHeadersNames = {}, + + // Default abort message + strAbort = "canceled", + + // Fake xhr + jqXHR = { + readyState: 0, + + // Builds headers hashtable if needed + getResponseHeader: function( key ) { + var match; + if ( completed ) { + if ( !responseHeaders ) { + responseHeaders = {}; + while ( ( match = rheaders.exec( responseHeadersString ) ) ) { + responseHeaders[ match[ 1 ].toLowerCase() ] = match[ 2 ]; + } + } + match = responseHeaders[ key.toLowerCase() ]; + } + return match == null ? null : match; + }, + + // Raw string + getAllResponseHeaders: function() { + return completed ? responseHeadersString : null; + }, + + // Caches the header + setRequestHeader: function( name, value ) { + if ( completed == null ) { + name = requestHeadersNames[ name.toLowerCase() ] = + requestHeadersNames[ name.toLowerCase() ] || name; + requestHeaders[ name ] = value; + } + return this; + }, + + // Overrides response content-type header + overrideMimeType: function( type ) { + if ( completed == null ) { + s.mimeType = type; + } + return this; + }, + + // Status-dependent callbacks + statusCode: function( map ) { + var code; + if ( map ) { + if ( completed ) { + + // Execute the appropriate callbacks + jqXHR.always( map[ jqXHR.status ] ); + } else { + + // Lazy-add the new callbacks in a way that preserves old ones + for ( code in map ) { + statusCode[ code ] = [ statusCode[ code ], map[ code ] ]; + } + } + } + return this; + }, + + // Cancel the request + abort: function( statusText ) { + var finalText = statusText || strAbort; + if ( transport ) { + transport.abort( finalText ); + } + done( 0, finalText ); + return this; + } + }; + + // Attach deferreds + deferred.promise( jqXHR ); + + // Add protocol if not provided (prefilters might expect it) + // Handle falsy url in the settings object (#10093: consistency with old signature) + // We also use the url parameter if available + s.url = ( ( url || s.url || location.href ) + "" ) + .replace( rprotocol, location.protocol + "//" ); + + // Alias method option to type as per ticket #12004 + s.type = options.method || options.type || s.method || s.type; + + // Extract dataTypes list + s.dataTypes = ( s.dataType || "*" ).toLowerCase().match( rnothtmlwhite ) || [ "" ]; + + // A cross-domain request is in order when the origin doesn't match the current origin. + if ( s.crossDomain == null ) { + urlAnchor = document.createElement( "a" ); + + // Support: IE <=8 - 11, Edge 12 - 13 + // IE throws exception on accessing the href property if url is malformed, + // e.g. http://example.com:80x/ + try { + urlAnchor.href = s.url; + + // Support: IE <=8 - 11 only + // Anchor's host property isn't correctly set when s.url is relative + urlAnchor.href = urlAnchor.href; + s.crossDomain = originAnchor.protocol + "//" + originAnchor.host !== + urlAnchor.protocol + "//" + urlAnchor.host; + } catch ( e ) { + + // If there is an error parsing the URL, assume it is crossDomain, + // it can be rejected by the transport if it is invalid + s.crossDomain = true; + } + } + + // Convert data if not already a string + if ( s.data && s.processData && typeof s.data !== "string" ) { + s.data = jQuery.param( s.data, s.traditional ); + } + + // Apply prefilters + inspectPrefiltersOrTransports( prefilters, s, options, jqXHR ); + + // If request was aborted inside a prefilter, stop there + if ( completed ) { + return jqXHR; + } + + // We can fire global events as of now if asked to + // Don't fire events if jQuery.event is undefined in an AMD-usage scenario (#15118) + fireGlobals = jQuery.event && s.global; + + // Watch for a new set of requests + if ( fireGlobals && jQuery.active++ === 0 ) { + jQuery.event.trigger( "ajaxStart" ); + } + + // Uppercase the type + s.type = s.type.toUpperCase(); + + // Determine if request has content + s.hasContent = !rnoContent.test( s.type ); + + // Save the URL in case we're toying with the If-Modified-Since + // and/or If-None-Match header later on + // Remove hash to simplify url manipulation + cacheURL = s.url.replace( rhash, "" ); + + // More options handling for requests with no content + if ( !s.hasContent ) { + + // Remember the hash so we can put it back + uncached = s.url.slice( cacheURL.length ); + + // If data is available, append data to url + if ( s.data ) { + cacheURL += ( rquery.test( cacheURL ) ? "&" : "?" ) + s.data; + + // #9682: remove data so that it's not used in an eventual retry + delete s.data; + } + + // Add or update anti-cache param if needed + if ( s.cache === false ) { + cacheURL = cacheURL.replace( rantiCache, "$1" ); + uncached = ( rquery.test( cacheURL ) ? "&" : "?" ) + "_=" + ( nonce++ ) + uncached; + } + + // Put hash and anti-cache on the URL that will be requested (gh-1732) + s.url = cacheURL + uncached; + + // Change '%20' to '+' if this is encoded form body content (gh-2658) + } else if ( s.data && s.processData && + ( s.contentType || "" ).indexOf( "application/x-www-form-urlencoded" ) === 0 ) { + s.data = s.data.replace( r20, "+" ); + } + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + if ( jQuery.lastModified[ cacheURL ] ) { + jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ cacheURL ] ); + } + if ( jQuery.etag[ cacheURL ] ) { + jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ cacheURL ] ); + } + } + + // Set the correct header, if data is being sent + if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) { + jqXHR.setRequestHeader( "Content-Type", s.contentType ); + } + + // Set the Accepts header for the server, depending on the dataType + jqXHR.setRequestHeader( + "Accept", + s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[ 0 ] ] ? + s.accepts[ s.dataTypes[ 0 ] ] + + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) : + s.accepts[ "*" ] + ); + + // Check for headers option + for ( i in s.headers ) { + jqXHR.setRequestHeader( i, s.headers[ i ] ); + } + + // Allow custom headers/mimetypes and early abort + if ( s.beforeSend && + ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || completed ) ) { + + // Abort if not done already and return + return jqXHR.abort(); + } + + // Aborting is no longer a cancellation + strAbort = "abort"; + + // Install callbacks on deferreds + completeDeferred.add( s.complete ); + jqXHR.done( s.success ); + jqXHR.fail( s.error ); + + // Get transport + transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR ); + + // If no transport, we auto-abort + if ( !transport ) { + done( -1, "No Transport" ); + } else { + jqXHR.readyState = 1; + + // Send global event + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] ); + } + + // If request was aborted inside ajaxSend, stop there + if ( completed ) { + return jqXHR; + } + + // Timeout + if ( s.async && s.timeout > 0 ) { + timeoutTimer = window.setTimeout( function() { + jqXHR.abort( "timeout" ); + }, s.timeout ); + } + + try { + completed = false; + transport.send( requestHeaders, done ); + } catch ( e ) { + + // Rethrow post-completion exceptions + if ( completed ) { + throw e; + } + + // Propagate others as results + done( -1, e ); + } + } + + // Callback for when everything is done + function done( status, nativeStatusText, responses, headers ) { + var isSuccess, success, error, response, modified, + statusText = nativeStatusText; + + // Ignore repeat invocations + if ( completed ) { + return; + } + + completed = true; + + // Clear timeout if it exists + if ( timeoutTimer ) { + window.clearTimeout( timeoutTimer ); + } + + // Dereference transport for early garbage collection + // (no matter how long the jqXHR object will be used) + transport = undefined; + + // Cache response headers + responseHeadersString = headers || ""; + + // Set readyState + jqXHR.readyState = status > 0 ? 4 : 0; + + // Determine if successful + isSuccess = status >= 200 && status < 300 || status === 304; + + // Get response data + if ( responses ) { + response = ajaxHandleResponses( s, jqXHR, responses ); + } + + // Convert no matter what (that way responseXXX fields are always set) + response = ajaxConvert( s, response, jqXHR, isSuccess ); + + // If successful, handle type chaining + if ( isSuccess ) { + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + modified = jqXHR.getResponseHeader( "Last-Modified" ); + if ( modified ) { + jQuery.lastModified[ cacheURL ] = modified; + } + modified = jqXHR.getResponseHeader( "etag" ); + if ( modified ) { + jQuery.etag[ cacheURL ] = modified; + } + } + + // if no content + if ( status === 204 || s.type === "HEAD" ) { + statusText = "nocontent"; + + // if not modified + } else if ( status === 304 ) { + statusText = "notmodified"; + + // If we have data, let's convert it + } else { + statusText = response.state; + success = response.data; + error = response.error; + isSuccess = !error; + } + } else { + + // Extract error from statusText and normalize for non-aborts + error = statusText; + if ( status || !statusText ) { + statusText = "error"; + if ( status < 0 ) { + status = 0; + } + } + } + + // Set data for the fake xhr object + jqXHR.status = status; + jqXHR.statusText = ( nativeStatusText || statusText ) + ""; + + // Success/Error + if ( isSuccess ) { + deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] ); + } else { + deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] ); + } + + // Status-dependent callbacks + jqXHR.statusCode( statusCode ); + statusCode = undefined; + + if ( fireGlobals ) { + globalEventContext.trigger( isSuccess ? "ajaxSuccess" : "ajaxError", + [ jqXHR, s, isSuccess ? success : error ] ); + } + + // Complete + completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] ); + + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] ); + + // Handle the global AJAX counter + if ( !( --jQuery.active ) ) { + jQuery.event.trigger( "ajaxStop" ); + } + } + } + + return jqXHR; + }, + + getJSON: function( url, data, callback ) { + return jQuery.get( url, data, callback, "json" ); + }, + + getScript: function( url, callback ) { + return jQuery.get( url, undefined, callback, "script" ); + } +} ); + +jQuery.each( [ "get", "post" ], function( i, method ) { + jQuery[ method ] = function( url, data, callback, type ) { + + // Shift arguments if data argument was omitted + if ( jQuery.isFunction( data ) ) { + type = type || callback; + callback = data; + data = undefined; + } + + // The url can be an options object (which then must have .url) + return jQuery.ajax( jQuery.extend( { + url: url, + type: method, + dataType: type, + data: data, + success: callback + }, jQuery.isPlainObject( url ) && url ) ); + }; +} ); + + +jQuery._evalUrl = function( url ) { + return jQuery.ajax( { + url: url, + + // Make this explicit, since user can override this through ajaxSetup (#11264) + type: "GET", + dataType: "script", + cache: true, + async: false, + global: false, + "throws": true + } ); +}; + + +jQuery.fn.extend( { + wrapAll: function( html ) { + var wrap; + + if ( this[ 0 ] ) { + if ( jQuery.isFunction( html ) ) { + html = html.call( this[ 0 ] ); + } + + // The elements to wrap the target around + wrap = jQuery( html, this[ 0 ].ownerDocument ).eq( 0 ).clone( true ); + + if ( this[ 0 ].parentNode ) { + wrap.insertBefore( this[ 0 ] ); + } + + wrap.map( function() { + var elem = this; + + while ( elem.firstElementChild ) { + elem = elem.firstElementChild; + } + + return elem; + } ).append( this ); + } + + return this; + }, + + wrapInner: function( html ) { + if ( jQuery.isFunction( html ) ) { + return this.each( function( i ) { + jQuery( this ).wrapInner( html.call( this, i ) ); + } ); + } + + return this.each( function() { + var self = jQuery( this ), + contents = self.contents(); + + if ( contents.length ) { + contents.wrapAll( html ); + + } else { + self.append( html ); + } + } ); + }, + + wrap: function( html ) { + var isFunction = jQuery.isFunction( html ); + + return this.each( function( i ) { + jQuery( this ).wrapAll( isFunction ? html.call( this, i ) : html ); + } ); + }, + + unwrap: function( selector ) { + this.parent( selector ).not( "body" ).each( function() { + jQuery( this ).replaceWith( this.childNodes ); + } ); + return this; + } +} ); + + +jQuery.expr.pseudos.hidden = function( elem ) { + return !jQuery.expr.pseudos.visible( elem ); +}; +jQuery.expr.pseudos.visible = function( elem ) { + return !!( elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length ); +}; + + + + +jQuery.ajaxSettings.xhr = function() { + try { + return new window.XMLHttpRequest(); + } catch ( e ) {} +}; + +var xhrSuccessStatus = { + + // File protocol always yields status code 0, assume 200 + 0: 200, + + // Support: IE <=9 only + // #1450: sometimes IE returns 1223 when it should be 204 + 1223: 204 + }, + xhrSupported = jQuery.ajaxSettings.xhr(); + +support.cors = !!xhrSupported && ( "withCredentials" in xhrSupported ); +support.ajax = xhrSupported = !!xhrSupported; + +jQuery.ajaxTransport( function( options ) { + var callback, errorCallback; + + // Cross domain only allowed if supported through XMLHttpRequest + if ( support.cors || xhrSupported && !options.crossDomain ) { + return { + send: function( headers, complete ) { + var i, + xhr = options.xhr(); + + xhr.open( + options.type, + options.url, + options.async, + options.username, + options.password + ); + + // Apply custom fields if provided + if ( options.xhrFields ) { + for ( i in options.xhrFields ) { + xhr[ i ] = options.xhrFields[ i ]; + } + } + + // Override mime type if needed + if ( options.mimeType && xhr.overrideMimeType ) { + xhr.overrideMimeType( options.mimeType ); + } + + // X-Requested-With header + // For cross-domain requests, seeing as conditions for a preflight are + // akin to a jigsaw puzzle, we simply never set it to be sure. + // (it can always be set on a per-request basis or even using ajaxSetup) + // For same-domain requests, won't change header if already provided. + if ( !options.crossDomain && !headers[ "X-Requested-With" ] ) { + headers[ "X-Requested-With" ] = "XMLHttpRequest"; + } + + // Set headers + for ( i in headers ) { + xhr.setRequestHeader( i, headers[ i ] ); + } + + // Callback + callback = function( type ) { + return function() { + if ( callback ) { + callback = errorCallback = xhr.onload = + xhr.onerror = xhr.onabort = xhr.onreadystatechange = null; + + if ( type === "abort" ) { + xhr.abort(); + } else if ( type === "error" ) { + + // Support: IE <=9 only + // On a manual native abort, IE9 throws + // errors on any property access that is not readyState + if ( typeof xhr.status !== "number" ) { + complete( 0, "error" ); + } else { + complete( + + // File: protocol always yields status 0; see #8605, #14207 + xhr.status, + xhr.statusText + ); + } + } else { + complete( + xhrSuccessStatus[ xhr.status ] || xhr.status, + xhr.statusText, + + // Support: IE <=9 only + // IE9 has no XHR2 but throws on binary (trac-11426) + // For XHR2 non-text, let the caller handle it (gh-2498) + ( xhr.responseType || "text" ) !== "text" || + typeof xhr.responseText !== "string" ? + { binary: xhr.response } : + { text: xhr.responseText }, + xhr.getAllResponseHeaders() + ); + } + } + }; + }; + + // Listen to events + xhr.onload = callback(); + errorCallback = xhr.onerror = callback( "error" ); + + // Support: IE 9 only + // Use onreadystatechange to replace onabort + // to handle uncaught aborts + if ( xhr.onabort !== undefined ) { + xhr.onabort = errorCallback; + } else { + xhr.onreadystatechange = function() { + + // Check readyState before timeout as it changes + if ( xhr.readyState === 4 ) { + + // Allow onerror to be called first, + // but that will not handle a native abort + // Also, save errorCallback to a variable + // as xhr.onerror cannot be accessed + window.setTimeout( function() { + if ( callback ) { + errorCallback(); + } + } ); + } + }; + } + + // Create the abort callback + callback = callback( "abort" ); + + try { + + // Do send the request (this may raise an exception) + xhr.send( options.hasContent && options.data || null ); + } catch ( e ) { + + // #14683: Only rethrow if this hasn't been notified as an error yet + if ( callback ) { + throw e; + } + } + }, + + abort: function() { + if ( callback ) { + callback(); + } + } + }; + } +} ); + + + + +// Prevent auto-execution of scripts when no explicit dataType was provided (See gh-2432) +jQuery.ajaxPrefilter( function( s ) { + if ( s.crossDomain ) { + s.contents.script = false; + } +} ); + +// Install script dataType +jQuery.ajaxSetup( { + accepts: { + script: "text/javascript, application/javascript, " + + "application/ecmascript, application/x-ecmascript" + }, + contents: { + script: /\b(?:java|ecma)script\b/ + }, + converters: { + "text script": function( text ) { + jQuery.globalEval( text ); + return text; + } + } +} ); + +// Handle cache's special case and crossDomain +jQuery.ajaxPrefilter( "script", function( s ) { + if ( s.cache === undefined ) { + s.cache = false; + } + if ( s.crossDomain ) { + s.type = "GET"; + } +} ); + +// Bind script tag hack transport +jQuery.ajaxTransport( "script", function( s ) { + + // This transport only deals with cross domain requests + if ( s.crossDomain ) { + var script, callback; + return { + send: function( _, complete ) { + script = jQuery( " - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Behavioral Cloning
  • -
  • - -
  • -
-
-
-
-
- -

Behavioral Cloning

-

Actions space: Discrete|Continuous

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-

The replay buffer contains the expert demonstrations for the task. -These demonstrations are given as state, action tuples, and with no reward. -The training goal is to reduce the difference between the actions predicted by the network and the actions taken by the expert for each state.

-
    -
  1. Sample a batch of transitions from the replay buffer.
  2. -
  3. Use the current states as input to the network, and the expert actions as the targets of the network.
  4. -
  5. The loss function for the network is MSE, and therefore we use the Q head to minimize this loss.
  6. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/other/dfp/index.html b/docs/algorithms/other/dfp/index.html deleted file mode 100644 index c40d187..0000000 --- a/docs/algorithms/other/dfp/index.html +++ /dev/null @@ -1,299 +0,0 @@ - - - - - - - - - - - Direct Future Prediction - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Direct Future Prediction
  • -
  • - -
  • -
-
-
-
-
- -

Direct Future Prediction

-

Actions space: Discrete

-

References: Learning to Act by Predicting the Future

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action

-
    -
  1. The current states (observations and measurements) and the corresponding goal vector are passed as an input to the network. The output of the network is the predicted future measurements for time-steps and for each possible action.
  2. -
  3. For each action, the measurements of each predicted time-step are multiplied by the goal vector, and the result is a single vector of future values for each action.
  4. -
  5. Then, a weighted sum of the future values of each action is calculated, and the result is a single value for each action.
  6. -
  7. The action values are passed to the exploration policy to decide on the action to use.
  8. -
-

Training the network

-

Given a batch of transitions, run them through the network to get the current predictions of the future measurements per action, and set them as the initial targets for training the network. For each transition in the batch, the target of the network for the action that was taken, is the actual measurements that were seen in time-steps and . For the actions that were not taken, the targets are the current values.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/policy_optimization/ac/index.html b/docs/algorithms/policy_optimization/ac/index.html deleted file mode 100644 index feae706..0000000 --- a/docs/algorithms/policy_optimization/ac/index.html +++ /dev/null @@ -1,299 +0,0 @@ - - - - - - - - - - - Actor-Critic - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Actor-Critic
  • -
  • - -
  • -
-
-
-
-
- -

Actor-Critic

-

Actions space: Discrete|Continuous

-

References: Asynchronous Methods for Deep Reinforcement Learning

-

Network Structure

-

- -

-

Algorithm Description

-

Choosing an action - Discrete actions

-

The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical distribution assigned with these probabilities. When testing, the action with the highest probability is used.

-

Training the network

-

A batch of transitions is used, and the advantages are calculated upon it.

-

Advantages can be calculated by either of the following methods (configured by the selected preset) -

-
    -
  1. A_VALUE - Estimating advantage directly:where is for each state in the batch.
  2. -
  3. GAE - By following the Generalized Advantage Estimation paper.
  4. -
-

The advantages are then used in order to accumulate gradients according to - -

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/policy_optimization/cppo/index.html b/docs/algorithms/policy_optimization/cppo/index.html deleted file mode 100644 index c0a2146..0000000 --- a/docs/algorithms/policy_optimization/cppo/index.html +++ /dev/null @@ -1,309 +0,0 @@ - - - - - - - - - - - Clipped Proximal Policy Optimization - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Clipped Proximal Policy Optimization
  • -
  • - -
  • -
-
-
-
-
- -

Clipped Proximal Policy Optimization

-

Actions space: Discrete|Continuous

-

References: Proximal Policy Optimization Algorithms

-

Network Structure

-

- -

- -

Algorithm Description

-

Choosing an action - Continuous action

-

Same as in PPO.

-

Training the network

-

Very similar to PPO, with several small (but very simplifying) changes:

-
    -
  1. -

    Train both the value and policy networks, simultaneously, by defining a single loss function, which is the sum of each of the networks loss functions. Then, back propagate gradients only once from this unified loss function.

    -
  2. -
  3. -

    The unified network's optimizer is set to Adam (instead of L-BFGS for the value network as in PPO).

    -
  4. -
  5. -

    Value targets are now also calculated based on the GAE advantages. In this method, the values are predicted from the critic network, and then added to the GAE based advantages, in order to get a value for each action. Now, since our critic network is predicting a value for each state, setting the calculated action-values as a target, will on average serve as a state-value target.

    -
  6. -
  7. -

    Instead of adapting the penalizing KL divergence coefficient used in PPO, the likelihood ratio is clipped, to achieve a similar effect. This is done by defining the policy's loss function to be the minimum between the standard surrogate loss and an epsilon clipped surrogate loss:

    -
  8. -
-

- -

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/policy_optimization/ddpg/index.html b/docs/algorithms/policy_optimization/ddpg/index.html deleted file mode 100644 index 49ba6bd..0000000 --- a/docs/algorithms/policy_optimization/ddpg/index.html +++ /dev/null @@ -1,307 +0,0 @@ - - - - - - - - - - - Deep Determinstic Policy Gradients - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Deep Determinstic Policy Gradients
  • -
  • - -
  • -
-
-
-
-
- -

Deep Deterministic Policy Gradient

-

Actions space: Continuous

-

References: Continuous control with deep reinforcement learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action

-

Pass the current states through the actor network, and get an action mean vector . While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process, to add exploration noise to the action. When testing, use the mean vector as-is.

-

Training the network

-

Start by sampling a batch of transitions from the experience replay.

-
    -
  • To train the critic network, use the following targets:
  • -
-

- - First run the actor target network, using the next states as the inputs, and get . Next, run the critic target network using the next states and , and use the output to calculate according to the equation above. To train the network, use the current states and actions as the inputs, and as the targets.

-
    -
  • To train the actor network, use the following equation:
  • -
-

- - Use the actor's online network to get the action mean values using the current states as the inputs. Then, use the critic online network in order to get the gradients of the critic output with respect to the action mean values . Using the chain rule, calculate the gradients of the actor's output, with respect to the actor weights, given . Finally, apply those gradients to the actor network.

-

After every training step, do a soft update of the critic and actor target networks' weights from the online networks.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/policy_optimization/pg/index.html b/docs/algorithms/policy_optimization/pg/index.html deleted file mode 100644 index 777b434..0000000 --- a/docs/algorithms/policy_optimization/pg/index.html +++ /dev/null @@ -1,299 +0,0 @@ - - - - - - - - - - - Policy Gradient - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Policy Gradient
  • -
  • - -
  • -
-
-
-
-
- -

Policy Gradient

-

Actions space: Discrete|Continuous

-

References: Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action - Discrete actions

-

Run the current states through the network and get a policy distribution over the actions. While training, sample from the policy distribution. When testing, take the action with the highest probability.

-

Training the network

-

The policy head loss is defined as . The is used in order to reduce the policy gradient variance, which might be very noisy. This is done in order to reduce the variance of the updates, since noisy gradient updates might destabilize the policy's convergence. The rescaler is a configurable parameter and there are few options to choose from:
- Total Episode Return - The sum of all the discounted rewards during the episode. - Future Return - Return from each transition until the end of the episode. - Future Return Normalized by Episode - Future returns across the episode normalized by the episode's mean and standard deviation. - Future Return Normalized by Timestep - Future returns normalized using running means and standard deviations, which are calculated seperately for each timestep, across different episodes.

-

Gradients are accumulated over a number of full played episodes. The gradients accumulation over several episodes serves the same purpose - reducing the update variance. After accumulating gradients for several episodes, the gradients are then applied to the network.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/policy_optimization/ppo/index.html b/docs/algorithms/policy_optimization/ppo/index.html deleted file mode 100644 index 51ca06e..0000000 --- a/docs/algorithms/policy_optimization/ppo/index.html +++ /dev/null @@ -1,300 +0,0 @@ - - - - - - - - - - - Proximal Policy Optimization - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Proximal Policy Optimization
  • -
  • - -
  • -
-
-
-
-
- -

Proximal Policy Optimization

-

Actions space: Discrete|Continuous

-

References: Proximal Policy Optimization Algorithms

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action - Continuous actions

-

Run the observation through the policy network, and get the mean and standard deviation vectors for this observation. While in training phase, sample from a multi-dimensional Gaussian distribution with these mean and standard deviation values. When testing, just take the mean values predicted by the network.

-

Training the network

-
    -
  1. Collect a big chunk of experience (in the order of thousands of transitions, sampled from multiple episodes).
  2. -
  3. Calculate the advantages for each transition, using the Generalized Advantage Estimation method (Schulman '2015).
  4. -
  5. Run a single training iteration of the value network using an L-BFGS optimizer. Unlike first order optimizers, the L-BFGS optimizer runs on the entire dataset at once, without batching. It continues running until some low loss threshold is reached. To prevent overfitting to the current dataset, the value targets are updated in a soft manner, using an Exponentially Weighted Moving Average, based on the total discounted returns of each state in each episode.
  6. -
  7. Run several training iterations of the policy network. This is done by using the previously calculated advantages as targets. The loss function penalizes policies that deviate too far from the old policy (the policy that was used before starting to run the current set of training iterations) using a regularization term.
  8. -
  9. After training is done, the last sampled KL divergence value will be compared with the target KL divergence value, in order to adapt the penalty coefficient used in the policy loss. If the KL divergence went too high, increase the penalty, if it went too low, reduce it. Otherwise, leave it unchanged.
  10. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/bs_dqn/index.html b/docs/algorithms/value_optimization/bs_dqn/index.html deleted file mode 100644 index e00e11d..0000000 --- a/docs/algorithms/value_optimization/bs_dqn/index.html +++ /dev/null @@ -1,301 +0,0 @@ - - - - - - - - - - - Bootstrapped DQN - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Bootstrapped DQN
  • -
  • - -
  • -
-
-
-
-
- -

Bootstrapped DQN

-

Actions space: Discrete

-

References: Deep Exploration via Bootstrapped DQN

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action

-

The current states are used as the input to the network. The network contains several heads, which are used for returning different estimations of the action values. For each episode, the bootstrapped exploration policy selects a single head to play with during the episode. According to the selected head, only the relevant output values are used. Using those values, the exploration policy then selects the action for acting.

-

Storing the transitions

-

For each transition, a Binomial mask is generated according to a predefined probability, and the number of output heads. The mask is a binary vector where each element holds a 0 for heads that shouldn't train on the specific transition, and 1 for heads that should use the transition for training. The mask is stored as part of the transition info in the replay buffer.

-

Training the network

-

First, sample a batch of transitions from the replay buffer. Run the current states through the network and get the current value predictions for all the heads and all the actions. For each transition in the batch, and for each output head, if the transition mask is 1 - change the targets of the played action to , according to the standard DQN update rule:

-

- -

-

Otherwise, leave it intact so that the transition does not affect the learning of this head. Then, train the online network according to the calculated targets.

-

As in DQN, once in every few thousand steps, copy the weights from the online network to the target network.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/categorical_dqn/index.html b/docs/algorithms/value_optimization/categorical_dqn/index.html deleted file mode 100644 index 602bd1a..0000000 --- a/docs/algorithms/value_optimization/categorical_dqn/index.html +++ /dev/null @@ -1,310 +0,0 @@ - - - - - - - - - - - Categorical DQN - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Categorical DQN
  • -
  • - -
  • -
-
-
-
-
- -

Categorical DQN

-

Actions space: Discrete

-

References: A Distributional Perspective on Reinforcement Learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-
    -
  1. Sample a batch of transitions from the replay buffer.
  2. -
  3. -

    The Bellman update is projected to the set of atoms representing the values distribution, such that the component of the projected update is calculated as follows: - - where:

    -
      -
    • - bounds its argument in the range [a, b]
    • -
    • - is the Bellman update for atom :     -
    • -
    -
  4. -
  5. -

    Network is trained with the cross entropy loss between the resulting probability distribution and the target probability distribution. Only the target of the actions that were actually taken is updated.

    -
  6. -
  7. Once in every few thousand steps, weights are copied from the online network to the target network.
  8. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/double_dqn/index.html b/docs/algorithms/value_optimization/double_dqn/index.html deleted file mode 100644 index ea94365..0000000 --- a/docs/algorithms/value_optimization/double_dqn/index.html +++ /dev/null @@ -1,305 +0,0 @@ - - - - - - - - - - - Double DQN - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Double DQN
  • -
  • - -
  • -
-
-
-
-
- -

Double DQN

-

Actions space: Discrete

-

References: Deep Reinforcement Learning with Double Q-learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-
    -
  1. Sample a batch of transitions from the replay buffer.
  2. -
  3. Using the next states from the sampled batch, run the online network in order to find the maximizing action . For these actions, use the corresponding next states and run the target network to calculate .
  4. -
  5. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), use the current states from the sampled batch, and run the online network to get the current Q values predictions. Set those values as the targets for the actions that were not actually played.
  6. -
  7. -

    For each action that was played, use the following equation for calculating the targets of the network: - -

    -
  8. -
  9. -

    Finally, train the online network using the current states as inputs, and with the aforementioned targets.

    -
  10. -
  11. Once in every few thousand steps, copy the weights from the online network to the target network.
  12. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/dqn/index.html b/docs/algorithms/value_optimization/dqn/index.html deleted file mode 100644 index ea2adc0..0000000 --- a/docs/algorithms/value_optimization/dqn/index.html +++ /dev/null @@ -1,304 +0,0 @@ - - - - - - - - - - - DQN - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • DQN
  • -
  • - -
  • -
-
-
-
-
- -

Deep Q Networks

-

Actions space: Discrete

-

References: Playing Atari with Deep Reinforcement Learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-
    -
  1. Sample a batch of transitions from the replay buffer.
  2. -
  3. Using the next states from the sampled batch, run the target network to calculate the values for each of the actions , and keep only the maximum value for each state.
  4. -
  5. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), use the current states from the sampled batch, and run the online network to get the current Q values predictions. Set those values as the targets for the actions that were not actually played.
  6. -
  7. -

    For each action that was played, use the following equation for calculating the targets of the network:​ -

    -
  8. -
  9. -

    Finally, train the online network using the current states as inputs, and with the aforementioned targets.

    -
  10. -
  11. Once in every few thousand steps, copy the weights from the online network to the target network.
  12. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/dueling_dqn/index.html b/docs/algorithms/value_optimization/dueling_dqn/index.html deleted file mode 100644 index 199ba06..0000000 --- a/docs/algorithms/value_optimization/dueling_dqn/index.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - - - - - - - - Dueling DQN - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Dueling DQN
  • -
  • - -
  • -
-
-
-
-
- -

Dueling DQN

-

Actions space: Discrete

-

References: Dueling Network Architectures for Deep Reinforcement Learning

-

Network Structure

-

- - - -

- -

General Description

-

Dueling DQN presents a change in the network structure comparing to DQN.

-

Dueling DQN uses a specialized Dueling Q Head in order to separate to an (advantage) stream and a stream. Adding this type of structure to the network head allows the network to better differentiate actions from one another, and significantly improves the learning.

-

In many states, the values of the different actions are very similar, and it is less important which action to take. -This is especially important in environments where there are many actions to choose from. In DQN, on each training iteration, for each of the states in the batch, we update the values only for the specific actions taken in those states. This results in slower learning as we do not learn the values for actions that were not taken yet. On dueling architecture, on the other hand, learning is faster - as we start learning the state-value even if only a single action has been taken at this state.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/mmc/index.html b/docs/algorithms/value_optimization/mmc/index.html deleted file mode 100644 index 8668757..0000000 --- a/docs/algorithms/value_optimization/mmc/index.html +++ /dev/null @@ -1,306 +0,0 @@ - - - - - - - - - - - Mixed Monte Carlo - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Mixed Monte Carlo
  • -
  • - -
  • -
-
-
-
-
- -

Mixed Monte Carlo

-

Actions space: Discrete

-

References: Count-Based Exploration with Neural Density Models

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-

In MMC, targets are calculated as a mixture between Double DQN targets and full Monte Carlo samples (total discounted returns).

-

The DDQN targets are calculated in the same manner as in the DDQN agent:

-

- -

-

The Monte Carlo targets are calculated by summing up the discounted rewards across the entire episode:

-

- -

-

A mixing ratio is then used to get the final targets:

-

- -

-

Finally, the online network is trained using the current states as inputs, and the calculated targets. -Once in every few thousand steps, copy the weights from the online network to the target network.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/n_step/index.html b/docs/algorithms/value_optimization/n_step/index.html deleted file mode 100644 index 3f848cb..0000000 --- a/docs/algorithms/value_optimization/n_step/index.html +++ /dev/null @@ -1,305 +0,0 @@ - - - - - - - - - - - N-Step Q Learning - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • N-Step Q Learning
  • -
  • - -
  • -
-
-
-
-
- -

N-Step Q Learning

-

Actions space: Discrete

-

References: Asynchronous Methods for Deep Reinforcement Learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-

The -step Q learning algorithm works in similar manner to DQN except for the following changes:

-
    -
  1. -

    No replay buffer is used. Instead of sampling random batches of transitions, the network is trained every steps using the latest steps played by the agent.

    -
  2. -
  3. -

    In order to stabilize the learning, multiple workers work together to update the network. This creates the same effect as uncorrelating the samples used for training.

    -
  4. -
  5. -

    Instead of using single-step Q targets for the network, the rewards from consequent steps are accumulated to form the -step Q targets, according to the following equation: - -where is for each state in the batch

    -
  6. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/naf/index.html b/docs/algorithms/value_optimization/naf/index.html deleted file mode 100644 index d67e211..0000000 --- a/docs/algorithms/value_optimization/naf/index.html +++ /dev/null @@ -1,297 +0,0 @@ - - - - - - - - - - - Normalized Advantage Functions - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Normalized Advantage Functions
  • -
  • - -
  • -
-
-
-
-
- -

Normalized Advantage Functions

-

Actions space: Continuous

-

References: Continuous Deep Q-Learning with Model-based Acceleration

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action

-

The current state is used as an input to the network. The action mean is extracted from the output head. It is then passed to the exploration policy which adds noise in order to encourage exploration.

-

Training the network

-

The network is trained by using the following targets: - -Use the next states as the inputs to the target network and extract the value, from within the head, to get . Then, update the online network using the current states and actions as inputs, and as the targets. -After every training step, use a soft update in order to copy the weights from the online network to the target network.

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/nec/index.html b/docs/algorithms/value_optimization/nec/index.html deleted file mode 100644 index ee9fb8b..0000000 --- a/docs/algorithms/value_optimization/nec/index.html +++ /dev/null @@ -1,304 +0,0 @@ - - - - - - - - - - - Neural Episodic Control - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Neural Episodic Control
  • -
  • - -
  • -
-
-
-
-
- -

Neural Episodic Control

-

Actions space: Discrete

-

References: Neural Episodic Control

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Choosing an action

-
    -
  1. Use the current state as an input to the online network and extract the state embedding, which is the intermediate output from the middleware.
  2. -
  3. For each possible action , run the DND head using the state embedding and the selected action as inputs. The DND is queried and returns the nearest neighbor keys and values. The keys and values are used to calculate and return the action value from the network.
  4. -
  5. Pass all the values to the exploration policy and choose an action accordingly.
  6. -
  7. Store the state embeddings and actions taken during the current episode in a small buffer , in order to accumulate transitions until it is possible to calculate the total discounted returns over the entire episode.
  8. -
-

Finalizing an episode

-

For each step in the episode, the state embeddings and the taken actions are stored in the buffer . When the episode is finished, the replay buffer calculates the -step total return of each transition in the buffer, bootstrapped using the maximum value of the -th transition. Those values are inserted along with the total return into the DND, and the buffer is reset.

-

Training the network

-

Train the network only when the DND has enough entries for querying.

-

To train the network, the current states are used as the inputs and the -step returns are used as the targets. The -step return used takes into account consecutive steps, and bootstraps the last value from the network if necessary: - -

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/algorithms/value_optimization/pal/index.html b/docs/algorithms/value_optimization/pal/index.html deleted file mode 100644 index 2c39a13..0000000 --- a/docs/algorithms/value_optimization/pal/index.html +++ /dev/null @@ -1,318 +0,0 @@ - - - - - - - - - - - Persistent Advantage Learning - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Algorithms »
  • - - - -
  • Persistent Advantage Learning
  • -
  • - -
  • -
-
-
-
-
- -

Persistent Advantage Learning

-

Actions space: Discrete

-

References: Increasing the Action Gap: New Operators for Reinforcement Learning

-

Network Structure

-

- - - -

- -

Algorithm Description

-

Training the network

-
    -
  1. -

    Sample a batch of transitions from the replay buffer.

    -
  2. -
  3. -

    Start by calculating the initial target values in the same manner as they are calculated in DDQN - -

    -
  4. -
  5. The action gap should then be subtracted from each of the calculated targets. To calculate the action gap, run the target network using the current states and get the values for all the actions. Then estimate as the maximum predicted value for the current state: - -
  6. -
  7. For advantage learning (AL), reduce the action gap weighted by a predefined parameter from the targets : - -
  8. -
  9. For persistent advantage learning (PAL), the target network is also used in order to calculate the action gap for the next state: - - where is chosen by running the next states through the online network and choosing the action that has the highest predicted value. Finally, the targets will be defined as - - -
  10. -
  11. -

    Train the online network using the current states as inputs, and with the aforementioned targets.

    -
  12. -
  13. -

    Once in every few thousand steps, copy the weights from the online network to the target network.

    -
  14. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/components/additional_parameters.html b/docs/components/additional_parameters.html new file mode 100644 index 0000000..327ebcf --- /dev/null +++ b/docs/components/additional_parameters.html @@ -0,0 +1,391 @@ + + + + + + + + + + + Additional Parameters — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Additional Parameters

+
+

VisualizationParameters

+
+
+class rl_coach.base_parameters.VisualizationParameters(print_networks_summary=False, dump_csv=True, dump_signals_to_csv_every_x_episodes=5, dump_gifs=False, dump_mp4=False, video_dump_methods=None, dump_in_episode_signals=False, dump_parameters_documentation=True, render=False, native_rendering=False, max_fps_for_human_control=10, tensorboard=False, add_rendered_image_to_env_response=False)[source]
+
+++ + + + +
Parameters:
    +
  • print_networks_summary – If set to True, a summary of all the networks structure will be printed at the beginning of the experiment
  • +
  • dump_csv – If set to True, the logger will dump logs to a csv file once in every dump_signals_to_csv_every_x_episodes +episodes. The logs can be later used to visualize the training process using Coach Dashboard.
  • +
  • dump_signals_to_csv_every_x_episodes – Defines the number of episodes between writing new data to the csv log files. Lower values can affect +performance, as writing to disk may take time, and it is done synchronously.
  • +
  • dump_gifs – If set to True, GIF videos of the environment will be stored into the experiment directory according to +the filters defined in video_dump_methods.
  • +
  • dump_mp4 – If set to True, MP4 videos of the environment will be stored into the experiment directory according to +the filters defined in video_dump_methods.
  • +
  • dump_in_episode_signals – If set to True, csv files will be dumped for each episode for inspecting different metrics within the +episode. This means that for each step in each episode, different metrics such as the reward, the +future return, etc. will be saved. Setting this to True may affect performance severely, and therefore +this should be used only for debugging purposes.
  • +
  • dump_parameters_documentation – If set to True, a json file containing all the agent parameters will be saved in the experiment directory. +This may be very useful for inspecting the values defined for each parameters and making sure that all +the parameters are defined as expected.
  • +
  • render – If set to True, the environment render function will be called for each step, rendering the image of the +environment. This may affect the performance of training, and is highly dependent on the environment. +By default, Coach uses PyGame to render the environment image instead of the environment specific rendered. +To change this, use the native_rendering flag.
  • +
  • native_rendering – If set to True, the environment native renderer will be used for rendering the environment image. +In some cases this can be slower than rendering using PyGame through Coach, but in other cases the +environment opens its native renderer by default, so rendering with PyGame is an unnecessary overhead.
  • +
  • max_fps_for_human_control – The maximum number of frames per second used while playing the environment as a human. This only has +effect while using the –play flag for Coach.
  • +
  • tensorboard – If set to True, TensorBoard summaries will be stored in the experiment directory. This can later be +loaded in TensorBoard in order to visualize the training process.
  • +
  • video_dump_methods – A list of dump methods that will be used as filters for deciding when to save videos. +The filters in the list will be checked one after the other until the first dump method that returns +false for should_dump() in the environment class. This list will only be used if dump_mp4 or dump_gif are +set to True.
  • +
  • add_rendered_image_to_env_response – Some environments have a different observation compared to the one displayed while rendering. +For some cases it can be useful to pass the rendered image to the agent for visualization purposes. +If this flag is set to True, the rendered image will be added to the environment EnvResponse object, +which will be passed to the agent and allow using those images.
  • +
+
+
+ +
+
+

PresetValidationParameters

+
+
+class rl_coach.base_parameters.PresetValidationParameters(test=False, min_reward_threshold=0, max_episodes_to_achieve_reward=1, num_workers=1, reward_test_level=None, test_using_a_trace_test=True, trace_test_levels=None, trace_max_env_steps=5000)[source]
+
+++ + + + +
Parameters:
    +
  • test – A flag which specifies if the preset should be tested as part of the validation process.
  • +
  • min_reward_threshold – The minimum reward that the agent should pass after max_episodes_to_achieve_reward episodes when the +preset is run.
  • +
  • max_episodes_to_achieve_reward – The maximum number of episodes that the agent should train using the preset in order to achieve the +reward specified by min_reward_threshold.
  • +
  • num_workers – The number of workers that should be used when running this preset in the test suite for validation.
  • +
  • reward_test_level – The environment level or levels, given by a list of strings, that should be tested as part of the +reward tests suite.
  • +
  • test_using_a_trace_test – A flag that specifies if the preset should be run as part of the trace tests suite.
  • +
  • trace_test_levels – The environment level or levels, given by a list of strings, that should be tested as part of the +trace tests suite.
  • +
  • trace_max_env_steps – An integer representing the maximum number of environment steps to run when running this preset as part +of the trace tests suite.
  • +
+
+
+ +
+
+

TaskParameters

+
+
+class rl_coach.base_parameters.TaskParameters(framework_type: rl_coach.base_parameters.Frameworks = <Frameworks.tensorflow: 'TensorFlow'>, evaluate_only: bool = False, use_cpu: bool = False, experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, checkpoint_save_dir=None, export_onnx_graph: bool = False)[source]
+
+++ + + + +
Parameters:
    +
  • framework_type – deep learning framework type. currently only tensorflow is supported
  • +
  • evaluate_only – the task will be used only for evaluating the model
  • +
  • use_cpu – use the cpu for this task
  • +
  • experiment_path – the path to the directory which will store all the experiment outputs
  • +
  • seed – a seed to use for the random numbers generator
  • +
  • checkpoint_save_secs – the number of seconds between each checkpoint saving
  • +
  • checkpoint_restore_dir – the directory to restore the checkpoints from
  • +
  • checkpoint_save_dir – the directory to store the checkpoints in
  • +
  • export_onnx_graph – If set to True, this will export an onnx graph each time a checkpoint is saved
  • +
+
+
+ +
+
+

DistributedTaskParameters

+
+
+class rl_coach.base_parameters.DistributedTaskParameters(framework_type: rl_coach.base_parameters.Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str, task_index: int, evaluate_only: bool = False, num_tasks: int = None, num_training_tasks: int = None, use_cpu: bool = False, experiment_path=None, dnd=None, shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, checkpoint_save_dir=None, export_onnx_graph: bool = False)[source]
+
+++ + + + +
Parameters:
    +
  • framework_type – deep learning framework type. currently only tensorflow is supported
  • +
  • evaluate_only – the task will be used only for evaluating the model
  • +
  • parameters_server_hosts – comma-separated list of hostname:port pairs to which the parameter servers are +assigned
  • +
  • worker_hosts – comma-separated list of hostname:port pairs to which the workers are assigned
  • +
  • job_type – the job type - either ps (short for parameters server) or worker
  • +
  • task_index – the index of the process
  • +
  • num_tasks – the number of total tasks that are running (not including the parameters server)
  • +
  • num_training_tasks – the number of tasks that are training (not including the parameters server)
  • +
  • use_cpu – use the cpu for this task
  • +
  • experiment_path – the path to the directory which will store all the experiment outputs
  • +
  • dnd – an external DND to use for NEC. This is a workaround needed for a shared DND not using the scratchpad.
  • +
  • seed – a seed to use for the random numbers generator
  • +
  • checkpoint_save_secs – the number of seconds between each checkpoint saving
  • +
  • checkpoint_restore_dir – the directory to restore the checkpoints from
  • +
  • checkpoint_save_dir – the directory to store the checkpoints in
  • +
  • export_onnx_graph – If set to True, this will export an onnx graph each time a checkpoint is saved
  • +
+
+
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/imitation/bc.html b/docs/components/agents/imitation/bc.html new file mode 100644 index 0000000..ea5841e --- /dev/null +++ b/docs/components/agents/imitation/bc.html @@ -0,0 +1,298 @@ + + + + + + + + + + + Behavioral Cloning — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Behavioral Cloning

+

Actions space: Discrete | Continuous

+
+

Network Structure

+../../../_images/pg.png +
+
+

Algorithm Description

+
+

Training the network

+

The replay buffer contains the expert demonstrations for the task. +These demonstrations are given as state, action tuples, and with no reward. +The training goal is to reduce the difference between the actions predicted by the network and the actions taken by +the expert for each state.

+
    +
  1. Sample a batch of transitions from the replay buffer.
  2. +
  3. Use the current states as input to the network, and the expert actions as the targets of the network.
  4. +
  5. For the network head, we use the policy head, which uses the cross entropy loss function.
  6. +
+
+
+class rl_coach.agents.bc_agent.BCAlgorithmParameters[source]
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/imitation/cil.html b/docs/components/agents/imitation/cil.html new file mode 100644 index 0000000..60557e5 --- /dev/null +++ b/docs/components/agents/imitation/cil.html @@ -0,0 +1,313 @@ + + + + + + + + + + + Conditional Imitation Learning — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Conditional Imitation Learning

+

Actions space: Discrete | Continuous

+

References: End-to-end Driving via Conditional Imitation Learning

+
+

Network Structure

+../../../_images/cil.png +
+
+

Algorithm Description

+
+

Training the network

+

The replay buffer contains the expert demonstrations for the task. +These demonstrations are given as state, action tuples, and with no reward. +The training goal is to reduce the difference between the actions predicted by the network and the actions taken by +the expert for each state. +In conditional imitation learning, each transition is assigned a class, which determines the goal that was pursuit +in that transitions. For example, 3 possible classes can be: turn right, turn left and follow lane.

+
    +
  1. Sample a batch of transitions from the replay buffer, where the batch is balanced, meaning that an equal number +of transitions will be sampled from each class index.
  2. +
  3. Use the current states as input to the network, and assign the expert actions as the targets of the network heads +corresponding to the state classes. For the other heads, set the targets to match the currently predicted values, +so that the loss for the other heads will be zeroed out.
  4. +
  5. We use a regression head, that minimizes the MSE loss between the network predicted values and the target values.
  6. +
+
+
+class rl_coach.agents.cil_agent.CILAlgorithmParameters[source]
+
+++ + + + +
Parameters:state_key_with_the_class_index – (str) +The key of the state dictionary which corresponds to the value that will be used to control the class index.
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/index.html b/docs/components/agents/index.html new file mode 100644 index 0000000..454759f --- /dev/null +++ b/docs/components/agents/index.html @@ -0,0 +1,819 @@ + + + + + + + + + + + Agents — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Agents

+

Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes - +value optimization, policy optimization and imitation learning. +A detailed description of those algorithms can be found by navigating to each of the algorithm pages.

+../../_images/algorithms.png + +
+
+class rl_coach.base_parameters.AgentParameters(algorithm: rl_coach.base_parameters.AlgorithmParameters, exploration: ExplorationParameters, memory: MemoryParameters, networks: Dict[str, rl_coach.base_parameters.NetworkParameters], visualization: rl_coach.base_parameters.VisualizationParameters = <rl_coach.base_parameters.VisualizationParameters object>)[source]
+
+++ + + + +
Parameters:
    +
  • algorithm – A class inheriting AlgorithmParameters. +The parameters used for the specific algorithm used by the agent. +These parameters can be later referenced in the agent implementation through self.ap.algorithm.
  • +
  • exploration – Either a class inheriting ExplorationParameters or a dictionary mapping between action +space types and their corresponding ExplorationParameters. If a dictionary was used, +when the agent will be instantiated, the correct exploration policy parameters will be used +according to the real type of the environment action space. +These parameters will be used to instantiate the exporation policy.
  • +
  • memory – A class inheriting MemoryParameters. It defines all the parameters used by the memory module.
  • +
  • networks – A dictionary mapping between network names and their corresponding network parmeters, defined +as a class inheriting NetworkParameters. Each element will be used in order to instantiate +a NetworkWrapper class, and all the network wrappers will be stored in the agent under +self.network_wrappers. self.network_wrappers is a dict mapping between the network name that +was given in the networks dict, and the instantiated network wrapper.
  • +
  • visualization – A class inheriting VisualizationParameters and defining various parameters that can be +used for visualization purposes, such as printing to the screen, rendering, and saving videos.
  • +
+
+
+ +
+
+class rl_coach.agents.agent.Agent(agent_parameters: rl_coach.base_parameters.AgentParameters, parent: Union[LevelManager, CompositeAgent] = None)[source]
+
+++ + + + +
Parameters:agent_parameters – A AgentParameters class instance with all the agent parameters
+
+
+act() → rl_coach.core_types.ActionInfo[source]
+

Given the agents current knowledge, decide on the next action to apply to the environment

+ +++ + + + +
Returns:An ActionInfo object, which contains the action and any additional info from the action decision process
+
+ +
+
+call_memory(func, args=())[source]
+

This function is a wrapper to allow having the same calls for shared or unshared memories. +It should be used instead of calling the memory directly in order to allow different algorithms to work +both with a shared and a local memory.

+ +++ + + + + + +
Parameters:
    +
  • func – the name of the memory function to call
  • +
  • args – the arguments to supply to the function
  • +
+
Returns:

the return value of the function

+
+
+ +
+
+choose_action(curr_state)[source]
+

choose an action to act with in the current episode being played. Different behavior might be exhibited when +training or testing.

+ +++ + + + + + +
Parameters:curr_state – the current state to act upon.
Returns:chosen action, some action value describing the action (q-value, probability, etc)
+
+ +
+
+create_networks() → Dict[str, rl_coach.architectures.network_wrapper.NetworkWrapper][source]
+

Create all the networks of the agent. +The network creation will be done after setting the environment parameters for the agent, since they are needed +for creating the network.

+ +++ + + + +
Returns:A list containing all the networks
+
+ +
+
+get_predictions(states: List[Dict[str, numpy.ndarray]], prediction_type: rl_coach.core_types.PredictionType)[source]
+

Get a prediction from the agent with regard to the requested prediction_type. +If the agent cannot predict this type of prediction_type, or if there is more than possible way to do so, +raise a ValueException.

+ +++ + + + + + +
Parameters:
    +
  • states – The states to get a prediction for
  • +
  • prediction_type – The type of prediction to get for the states. For example, the state-value prediction.
  • +
+
Returns:

the predicted values

+
+
+ +
+
+get_state_embedding(state: dict) → numpy.ndarray[source]
+

Given a state, get the corresponding state embedding from the main network

+ +++ + + + + + +
Parameters:state – a state dict
Returns:a numpy embedding vector
+
+ +
+
+handle_episode_ended() → None[source]
+

Make any changes needed when each episode is ended. +This includes incrementing counters, updating full episode dependent values, updating logs, etc. +This function is called right after each episode is ended.

+ +++ + + + +
Returns:None
+
+ +
+
+init_environment_dependent_modules() → None[source]
+

Initialize any modules that depend on knowing information about the environment such as the action space or +the observation space

+ +++ + + + +
Returns:None
+
+ +
+
+learn_from_batch(batch) → Tuple[float, List, List][source]
+

Given a batch of transitions, calculates their target values and updates the network.

+ +++ + + + + + +
Parameters:batch – A list of transitions
Returns:The total loss of the training, the loss per head and the unclipped gradients
+
+ +
+
+log_to_screen() → None[source]
+

Write an episode summary line to the terminal

+ +++ + + + +
Returns:None
+
+ +
+
+observe(env_response: rl_coach.core_types.EnvResponse) → bool[source]
+

Given a response from the environment, distill the observation from it and store it for later use. +The response should be a dictionary containing the performed action, the new observation and measurements, +the reward, a game over flag and any additional information necessary.

+ +++ + + + + + +
Parameters:env_response – result of call from environment.step(action)
Returns:a boolean value which determines if the agent has decided to terminate the episode after seeing the +given observation
+
+ +
+
+parent
+

Get the parent class of the agent

+ +++ + + + +
Returns:the current phase
+
+ +
+
+phase
+

The current running phase of the agent

+ +++ + + + +
Returns:RunPhase
+
+ +
+
+post_training_commands() → None[source]
+

A function which allows adding any functionality that is required to run right after the training phase ends.

+ +++ + + + +
Returns:None
+
+ +
+
+prepare_batch_for_inference(states: Union[Dict[str, numpy.ndarray], List[Dict[str, numpy.ndarray]]], network_name: str) → Dict[str, numpy.core.multiarray.array][source]
+

Convert curr_state into input tensors tensorflow is expecting. i.e. if we have several inputs states, stack all +observations together, measurements together, etc.

+ +++ + + + + + +
Parameters:
    +
  • states – A list of environment states, where each one is a dict mapping from an observation name to its +corresponding observation
  • +
  • network_name – The agent network name to prepare the batch for. this is needed in order to extract only +the observation relevant for the network from the states.
  • +
+
Returns:

A dictionary containing a list of values from all the given states for each of the observations

+
+
+ +
+
+register_signal(signal_name: str, dump_one_value_per_episode: bool = True, dump_one_value_per_step: bool = False) → rl_coach.utils.Signal[source]
+

Register a signal such that its statistics will be dumped and be viewable through dashboard

+ +++ + + + + + +
Parameters:
    +
  • signal_name – the name of the signal as it will appear in dashboard
  • +
  • dump_one_value_per_episode – should the signal value be written for each episode?
  • +
  • dump_one_value_per_step – should the signal value be written for each step?
  • +
+
Returns:

the created signal

+
+
+ +
+
+reset_evaluation_state(val: rl_coach.core_types.RunPhase) → None[source]
+

Perform accumulators initialization when entering an evaluation phase, and signal dumping when exiting an +evaluation phase. Entering or exiting the evaluation phase is determined according to the new phase given +by val, and by the current phase set in self.phase.

+ +++ + + + + + +
Parameters:val – The new phase to change to
Returns:None
+
+ +
+
+reset_internal_state() → None[source]
+

Reset all the episodic parameters. This function is called right before each episode starts.

+ +++ + + + +
Returns:None
+
+ +
+
+run_pre_network_filter_for_inference(state: Dict[str, numpy.ndarray]) → Dict[str, numpy.ndarray][source]
+

Run filters which where defined for being applied right before using the state for inference.

+ +++ + + + + + +
Parameters:state – The state to run the filters on
Returns:The filtered state
+
+ +
+
+save_checkpoint(checkpoint_id: int) → None[source]
+

Allows agents to store additional information when saving checkpoints.

+ +++ + + + + + +
Parameters:checkpoint_id – the id of the checkpoint
Returns:None
+
+ +
+
+set_environment_parameters(spaces: rl_coach.spaces.SpacesDefinition)[source]
+

Sets the parameters that are environment dependent. As a side effect, initializes all the components that are +dependent on those values, by calling init_environment_dependent_modules

+ +++ + + + + + +
Parameters:spaces – the environment spaces definition
Returns:None
+
+ +
+
+set_incoming_directive(action: Union[int, float, numpy.ndarray, List]) → None[source]
+

Allows setting a directive for the agent to follow. This is useful in hierarchy structures, where the agent +has another master agent that is controlling it. In such cases, the master agent can define the goals for the +slave agent, define it’s observation, possible actions, etc. The directive type is defined by the agent +in-action-space.

+ +++ + + + + + +
Parameters:action – The action that should be set as the directive
Returns:
+
+ +
+
+set_session(sess) → None[source]
+

Set the deep learning framework session for all the agents in the composite agent

+ +++ + + + +
Returns:None
+
+ +
+
+setup_logger() → None[source]
+

Setup the logger for the agent

+ +++ + + + +
Returns:None
+
+ +
+
+sync() → None[source]
+

Sync the global network parameters to local networks

+ +++ + + + +
Returns:None
+
+ +
+
+train() → float[source]
+

Check if a training phase should be done as configured by num_consecutive_playing_steps. +If it should, then do several training steps as configured by num_consecutive_training_steps. +A single training iteration: Sample a batch, train on it and update target networks.

+ +++ + + + +
Returns:The total training loss during the training iterations.
+
+ +
+
+update_log() → None[source]
+

Updates the episodic log file with all the signal values from the most recent episode. +Additional signals for logging can be set by the creating a new signal using self.register_signal, +and then updating it with some internal agent values.

+ +++ + + + +
Returns:None
+
+ +
+
+update_step_in_episode_log() → None[source]
+

Updates the in-episode log file with all the signal values from the most recent step.

+ +++ + + + +
Returns:None
+
+ +
+
+update_transition_before_adding_to_replay_buffer(transition: rl_coach.core_types.Transition) → rl_coach.core_types.Transition[source]
+

Allows agents to update the transition just before adding it to the replay buffer. +Can be useful for agents that want to tweak the reward, termination signal, etc.

+ +++ + + + + + +
Parameters:transition – the transition to update
Returns:the updated transition
+
+ +
+ +
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/other/dfp.html b/docs/components/agents/other/dfp.html new file mode 100644 index 0000000..03a7bba --- /dev/null +++ b/docs/components/agents/other/dfp.html @@ -0,0 +1,341 @@ + + + + + + + + + + + Direct Future Prediction — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Direct Future Prediction

+

Actions space: Discrete

+

References: Learning to Act by Predicting the Future

+
+

Network Structure

+../../../_images/dfp.png +
+
+

Algorithm Description

+
+

Choosing an action

+
    +
  1. The current states (observations and measurements) and the corresponding goal vector are passed as an input to the network. +The output of the network is the predicted future measurements for time-steps \(t+1,t+2,t+4,t+8,t+16\) and +\(t+32\) for each possible action.
  2. +
  3. For each action, the measurements of each predicted time-step are multiplied by the goal vector, +and the result is a single vector of future values for each action.
  4. +
  5. Then, a weighted sum of the future values of each action is calculated, and the result is a single value for each action.
  6. +
  7. The action values are passed to the exploration policy to decide on the action to use.
  8. +
+
+
+

Training the network

+

Given a batch of transitions, run them through the network to get the current predictions of the future measurements +per action, and set them as the initial targets for training the network. For each transition +\((s_t,a_t,r_t,s_{t+1} )\) in the batch, the target of the network for the action that was taken, is the actual

+
+
measurements that were seen in time-steps \(t+1,t+2,t+4,t+8,t+16\) and \(t+32\). +For the actions that were not taken, the targets are the current values.
+
+
+class rl_coach.agents.dfp_agent.DFPAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • num_predicted_steps_ahead – (int) +Number of future steps to predict measurements for. The future steps won’t be sequential, but rather jump +in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4
  • +
  • goal_vector – (List[float]) +The goal vector will weight each of the measurements to form an optimization goal. The vector should have +the same length as the number of measurements, and it will be vector multiplied by the measurements. +Positive values correspond to trying to maximize the particular measurement, and negative values +correspond to trying to minimize the particular measurement.
  • +
  • future_measurements_weights – (List[float]) +The future_measurements_weights weight the contribution of each of the predicted timesteps to the optimization +goal. For example, if there are 6 steps predicted ahead, and a future_measurements_weights vector with 3 values, +then only the 3 last timesteps will be taken into account, according to the weights in the +future_measurements_weights vector.
  • +
  • use_accumulated_reward_as_measurement – (bool) +If set to True, the accumulated reward from the beginning of the episode will be added as a measurement to +the measurements vector in the state. This van be useful in environments where the given measurements don’t +include enough information for the particular goal the agent should achieve.
  • +
  • handling_targets_after_episode_end – (HandlingTargetsAfterEpisodeEnd) +Dictates how to handle measurements that are outside the episode length.
  • +
  • scale_measurements_targets – (Dict[str, float]) +Allows rescaling the values of each of the measurements available. This van be useful when the measurements +have a different scale and you want to normalize them to the same scale.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/ac.html b/docs/components/agents/policy_optimization/ac.html new file mode 100644 index 0000000..dc5a03c --- /dev/null +++ b/docs/components/agents/policy_optimization/ac.html @@ -0,0 +1,331 @@ + + + + + + + + + + + Actor-Critic — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Actor-Critic

+

Actions space: Discrete | Continuous

+

References: Asynchronous Methods for Deep Reinforcement Learning

+
+

Network Structure

+../../../_images/ac.png +
+
+

Algorithm Description

+
+

Choosing an action - Discrete actions

+

The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical +distribution assigned with these probabilities. When testing, the action with the highest probability is used.

+
+
+

Training the network

+

A batch of \(T_{max}\) transitions is used, and the advantages are calculated upon it.

+

Advantages can be calculated by either of the following methods (configured by the selected preset) -

+
    +
  1. A_VALUE - Estimating advantage directly: +\(A(s_t, a_t) = \underbrace{\sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})}_{Q(s_t, a_t)} - V(s_t)\) +where \(k\) is \(T_{max} - State\_Index\) for each state in the batch.
  2. +
  3. GAE - By following the Generalized Advantage Estimation paper.
  4. +
+

The advantages are then used in order to accumulate gradients according to +\(L = -\mathop{\mathbb{E}} [log (\pi) \cdot A]\)

+
+
+class rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • policy_gradient_rescaler – (PolicyGradientRescaler) +The value that will be used to rescale the policy gradient
  • +
  • apply_gradients_every_x_episodes – (int) +The number of episodes to wait before applying the accumulated gradients to the network. +The training iterations only accumulate gradients without actually applying them.
  • +
  • beta_entropy – (float) +The weight that will be given to the entropy regularization which is used in order to improve exploration.
  • +
  • num_steps_between_gradient_updates – (int) +Every num_steps_between_gradient_updates transitions will be considered as a single batch and use for +accumulating gradients. This is also the number of steps used for bootstrapping according to the n-step formulation.
  • +
  • gae_lambda – (float) +If the policy gradient rescaler was defined as PolicyGradientRescaler.GAE, the generalized advantage estimation +scheme will be used, in which case the lambda value controls the decay for the different n-step lengths.
  • +
  • estimate_state_value_using_gae – (bool) +If set to True, the state value targets for the V head will be estimated using the GAE scheme.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/cppo.html b/docs/components/agents/policy_optimization/cppo.html new file mode 100644 index 0000000..8f9b1c2 --- /dev/null +++ b/docs/components/agents/policy_optimization/cppo.html @@ -0,0 +1,354 @@ + + + + + + + + + + + Clipped Proximal Policy Optimization — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Clipped Proximal Policy Optimization

+

Actions space: Discrete | Continuous

+

References: Proximal Policy Optimization Algorithms

+
+

Network Structure

+../../../_images/ppo.png +
+
+

Algorithm Description

+
+

Choosing an action - Continuous action

+

Same as in PPO.

+
+
+

Training the network

+

Very similar to PPO, with several small (but very simplifying) changes:

+
    +
  1. Train both the value and policy networks, simultaneously, by defining a single loss function, +which is the sum of each of the networks loss functions. Then, back propagate gradients only once from this unified loss function.

    +
  2. +
  3. The unified network’s optimizer is set to Adam (instead of L-BFGS for the value network as in PPO).

    +
  4. +
  5. Value targets are now also calculated based on the GAE advantages. +In this method, the \(V\) values are predicted from the critic network, and then added to the GAE based advantages, +in order to get a \(Q\) value for each action. Now, since our critic network is predicting a \(V\) value for +each state, setting the \(Q\) calculated action-values as a target, will on average serve as a \(V\) state-value target.

    +
  6. +
  7. Instead of adapting the penalizing KL divergence coefficient used in PPO, the likelihood ratio +\(r_t(\theta) =\frac{\pi_{\theta}(a|s)}{\pi_{\theta_{old}}(a|s)}\) is clipped, to achieve a similar effect. +This is done by defining the policy’s loss function to be the minimum between the standard surrogate loss and an epsilon +clipped surrogate loss:

    +

    \(L^{CLIP}(\theta)=E_{t}[min(r_t(\theta)\cdot \hat{A}_t, clip(r_t(\theta), 1-\epsilon, 1+\epsilon) \cdot \hat{A}_t)]\)

    +
  8. +
+
+
+class rl_coach.agents.clipped_ppo_agent.ClippedPPOAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • policy_gradient_rescaler – (PolicyGradientRescaler) +This represents how the critic will be used to update the actor. The critic value function is typically used +to rescale the gradients calculated by the actor. There are several ways for doing this, such as using the +advantage of the action, or the generalized advantage estimation (GAE) value.
  • +
  • gae_lambda – (float) +The \(\lambda\) value is used within the GAE function in order to weight different bootstrap length +estimations. Typical values are in the range 0.9-1, and define an exponential decay over the different +n-step estimations.
  • +
  • clip_likelihood_ratio_using_epsilon – (float) +If not None, the likelihood ratio between the current and new policy in the PPO loss function will be +clipped to the range [1-clip_likelihood_ratio_using_epsilon, 1+clip_likelihood_ratio_using_epsilon]. +This is typically used in the Clipped PPO version of PPO, and should be set to None in regular PPO +implementations.
  • +
  • value_targets_mix_fraction – (float) +The targets for the value network are an exponential weighted moving average which uses this mix fraction to +define how much of the new targets will be taken into account when calculating the loss. +This value should be set to the range (0,1], where 1 means that only the new targets will be taken into account.
  • +
  • estimate_state_value_using_gae – (bool) +If set to True, the state value will be estimated using the GAE technique.
  • +
  • use_kl_regularization – (bool) +If set to True, the loss function will be regularized using the KL diveregence between the current and new +policy, to bound the change of the policy during the network update.
  • +
  • beta_entropy – (float) +An entropy regulaization term can be added to the loss function in order to control exploration. This term +is weighted using the \(eta\) value defined by beta_entropy.
  • +
  • optimization_epochs – (int) +For each training phase, the collected dataset will be used for multiple epochs, which are defined by the +optimization_epochs value.
  • +
  • optimization_epochs – (Schedule) +Can be used to define a schedule over the clipping of the likelihood ratio.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/ddpg.html b/docs/components/agents/policy_optimization/ddpg.html new file mode 100644 index 0000000..97261d8 --- /dev/null +++ b/docs/components/agents/policy_optimization/ddpg.html @@ -0,0 +1,345 @@ + + + + + + + + + + + Deep Deterministic Policy Gradient — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Deep Deterministic Policy Gradient

+

Actions space: Continuous

+

References: Continuous control with deep reinforcement learning

+
+

Network Structure

+../../../_images/ddpg.png +
+
+

Algorithm Description

+
+

Choosing an action

+

Pass the current states through the actor network, and get an action mean vector \(\mu\). +While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process, +to add exploration noise to the action. When testing, use the mean vector \(\mu\) as-is.

+
+
+

Training the network

+

Start by sampling a batch of transitions from the experience replay.

+
    +
  • To train the critic network, use the following targets:

    +

    \(y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},\mu(s_{t+1} ))\)

    +

    First run the actor target network, using the next states as the inputs, and get \(\mu (s_{t+1} )\). +Next, run the critic target network using the next states and \(\mu (s_{t+1} )\), and use the output to +calculate \(y_t\) according to the equation above. To train the network, use the current states and actions +as the inputs, and \(y_t\) as the targets.

    +
  • +
  • To train the actor network, use the following equation:

    +

    \(\nabla_{\theta^\mu } J \approx E_{s_t \tilde{} \rho^\beta } [\nabla_a Q(s,a)|_{s=s_t,a=\mu (s_t ) } \cdot \nabla_{\theta^\mu} \mu(s)|_{s=s_t} ]\)

    +

    Use the actor’s online network to get the action mean values using the current states as the inputs. +Then, use the critic online network in order to get the gradients of the critic output with respect to the +action mean values \(\nabla _a Q(s,a)|_{s=s_t,a=\mu(s_t ) }\). +Using the chain rule, calculate the gradients of the actor’s output, with respect to the actor weights, +given \(\nabla_a Q(s,a)\). Finally, apply those gradients to the actor network.

    +
  • +
+

After every training step, do a soft update of the critic and actor target networks’ weights from the online networks.

+
+
+class rl_coach.agents.ddpg_agent.DDPGAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • num_steps_between_copying_online_weights_to_target – (StepMethod) +The number of steps between copying the online network weights to the target network weights.
  • +
  • rate_for_copying_weights_to_target – (float) +When copying the online network weights to the target network weights, a soft update will be used, which +weight the new online network weights by rate_for_copying_weights_to_target
  • +
  • num_consecutive_playing_steps – (StepMethod) +The number of consecutive steps to act between every two training iterations
  • +
  • use_target_network_for_evaluation – (bool) +If set to True, the target network will be used for predicting the actions when choosing actions to act. +Since the target network weights change more slowly, the predicted actions will be more consistent.
  • +
  • action_penalty – (float) +The amount by which to penalize the network on high action feature (pre-activation) values. +This can prevent the actions features from saturating the TanH activation function, and therefore prevent the +gradients from becoming very low.
  • +
  • clip_critic_targets – (Tuple[float, float] or None) +The range to clip the critic target to in order to prevent overestimation of the action values.
  • +
  • use_non_zero_discount_for_terminal_states – (bool) +If set to True, the discount factor will be used for terminal states to bootstrap the next predicted state +values. If set to False, the terminal states reward will be taken as the target return for the network.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/hac.html b/docs/components/agents/policy_optimization/hac.html new file mode 100644 index 0000000..2a91895 --- /dev/null +++ b/docs/components/agents/policy_optimization/hac.html @@ -0,0 +1,249 @@ + + + + + + + + + + + Hierarchical Actor Critic — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Hierarchical Actor Critic

+

Actions space: Continuous

+

References: Hierarchical Reinforcement Learning with Hindsight

+
+

Network Structure

+../../../_images/ddpg.png +
+
+

Algorithm Description

+
+

Choosing an action

+

Pass the current states through the actor network, and get an action mean vector \(\mu\). +While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process, +to add exploration noise to the action. When testing, use the mean vector \(\mu\) as-is.

+
+
+

Training the network

+
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/pg.html b/docs/components/agents/policy_optimization/pg.html new file mode 100644 index 0000000..a14998a --- /dev/null +++ b/docs/components/agents/policy_optimization/pg.html @@ -0,0 +1,336 @@ + + + + + + + + + + + Policy Gradient — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Policy Gradient

+

Actions space: Discrete | Continuous

+

References: Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning

+
+

Network Structure

+../../../_images/pg.png +
+
+

Algorithm Description

+
+

Choosing an action - Discrete actions

+

Run the current states through the network and get a policy distribution over the actions. +While training, sample from the policy distribution. When testing, take the action with the highest probability.

+
+
+

Training the network

+

The policy head loss is defined as \(L=-log (\pi) \cdot PolicyGradientRescaler\). +The PolicyGradientRescaler is used in order to reduce the policy gradient variance, which might be very noisy. +This is done in order to reduce the variance of the updates, since noisy gradient updates might destabilize the policy’s +convergence. The rescaler is a configurable parameter and there are few options to choose from:

+
    +
  • Total Episode Return - The sum of all the discounted rewards during the episode.
  • +
  • Future Return - Return from each transition until the end of the episode.
  • +
  • Future Return Normalized by Episode - Future returns across the episode normalized by the episode’s mean and standard deviation.
  • +
  • Future Return Normalized by Timestep - Future returns normalized using running means and standard deviations, +which are calculated seperately for each timestep, across different episodes.
  • +
+

Gradients are accumulated over a number of full played episodes. The gradients accumulation over several episodes +serves the same purpose - reducing the update variance. After accumulating gradients for several episodes, +the gradients are then applied to the network.

+
+
+class rl_coach.agents.policy_gradients_agent.PolicyGradientAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • policy_gradient_rescaler – (PolicyGradientRescaler) +The rescaler type to use for the policy gradient loss. For policy gradients, we calculate log probability of +the action and then multiply it by the policy gradient rescaler. The most basic rescaler is the discounter +return, but there are other rescalers that are intended for reducing the variance of the updates.
  • +
  • apply_gradients_every_x_episodes – (int) +The number of episodes between applying the accumulated gradients to the network. After every +num_steps_between_gradient_updates steps, the agent will calculate the gradients for the collected data, +it will then accumulate it in internal accumulators, and will only apply them to the network once in every +apply_gradients_every_x_episodes episodes.
  • +
  • beta_entropy – (float) +A factor which defines the amount of entropy regularization to apply to the network. The entropy of the actions +will be added to the loss and scaled by the given beta factor.
  • +
  • num_steps_between_gradient_updates – (int) +The number of steps between calculating gradients for the collected data. In the A3C paper, this parameter is +called t_max. Since this algorithm is on-policy, only the steps collected between each two gradient calculations +are used in the batch.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/policy_optimization/ppo.html b/docs/components/agents/policy_optimization/ppo.html new file mode 100644 index 0000000..19abb7b --- /dev/null +++ b/docs/components/agents/policy_optimization/ppo.html @@ -0,0 +1,355 @@ + + + + + + + + + + + Proximal Policy Optimization — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Proximal Policy Optimization

+

Actions space: Discrete | Continuous

+

References: Proximal Policy Optimization Algorithms

+
+

Network Structure

+../../../_images/ppo.png +
+
+

Algorithm Description

+
+

Choosing an action - Continuous actions

+

Run the observation through the policy network, and get the mean and standard deviation vectors for this observation. +While in training phase, sample from a multi-dimensional Gaussian distribution with these mean and standard deviation values. +When testing, just take the mean values predicted by the network.

+
+
+

Training the network

+
    +
  1. Collect a big chunk of experience (in the order of thousands of transitions, sampled from multiple episodes).
  2. +
  3. Calculate the advantages for each transition, using the Generalized Advantage Estimation method (Schulman ‘2015).
  4. +
  5. Run a single training iteration of the value network using an L-BFGS optimizer. Unlike first order optimizers, +the L-BFGS optimizer runs on the entire dataset at once, without batching. +It continues running until some low loss threshold is reached. To prevent overfitting to the current dataset, +the value targets are updated in a soft manner, using an Exponentially Weighted Moving Average, based on the total +discounted returns of each state in each episode.
  6. +
  7. Run several training iterations of the policy network. This is done by using the previously calculated advantages as +targets. The loss function penalizes policies that deviate too far from the old policy (the policy that was used before +starting to run the current set of training iterations) using a regularization term.
  8. +
  9. After training is done, the last sampled KL divergence value will be compared with the target KL divergence value, +in order to adapt the penalty coefficient used in the policy loss. If the KL divergence went too high, +increase the penalty, if it went too low, reduce it. Otherwise, leave it unchanged.
  10. +
+
+
+class rl_coach.agents.ppo_agent.PPOAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • policy_gradient_rescaler – (PolicyGradientRescaler) +This represents how the critic will be used to update the actor. The critic value function is typically used +to rescale the gradients calculated by the actor. There are several ways for doing this, such as using the +advantage of the action, or the generalized advantage estimation (GAE) value.
  • +
  • gae_lambda – (float) +The \(\lambda\) value is used within the GAE function in order to weight different bootstrap length +estimations. Typical values are in the range 0.9-1, and define an exponential decay over the different +n-step estimations.
  • +
  • target_kl_divergence – (float) +The target kl divergence between the current policy distribution and the new policy. PPO uses a heuristic to +bring the KL divergence to this value, by adding a penalty if the kl divergence is higher.
  • +
  • initial_kl_coefficient – (float) +The initial weight that will be given to the KL divergence between the current and the new policy in the +regularization factor.
  • +
  • high_kl_penalty_coefficient – (float) +The penalty that will be given for KL divergence values which are highes than what was defined as the target.
  • +
  • clip_likelihood_ratio_using_epsilon – (float) +If not None, the likelihood ratio between the current and new policy in the PPO loss function will be +clipped to the range [1-clip_likelihood_ratio_using_epsilon, 1+clip_likelihood_ratio_using_epsilon]. +This is typically used in the Clipped PPO version of PPO, and should be set to None in regular PPO +implementations.
  • +
  • value_targets_mix_fraction – (float) +The targets for the value network are an exponential weighted moving average which uses this mix fraction to +define how much of the new targets will be taken into account when calculating the loss. +This value should be set to the range (0,1], where 1 means that only the new targets will be taken into account.
  • +
  • estimate_state_value_using_gae – (bool) +If set to True, the state value will be estimated using the GAE technique.
  • +
  • use_kl_regularization – (bool) +If set to True, the loss function will be regularized using the KL diveregence between the current and new +policy, to bound the change of the policy during the network update.
  • +
  • beta_entropy – (float) +An entropy regulaization term can be added to the loss function in order to control exploration. This term +is weighted using the \(eta\) value defined by beta_entropy.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/bs_dqn.html b/docs/components/agents/value_optimization/bs_dqn.html new file mode 100644 index 0000000..36f3af3 --- /dev/null +++ b/docs/components/agents/value_optimization/bs_dqn.html @@ -0,0 +1,309 @@ + + + + + + + + + + + Bootstrapped DQN — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Bootstrapped DQN

+

Actions space: Discrete

+

References: Deep Exploration via Bootstrapped DQN

+
+

Network Structure

+../../../_images/bs_dqn.png +
+
+

Algorithm Description

+
+

Choosing an action

+

The current states are used as the input to the network. The network contains several $Q$ heads, which are used +for returning different estimations of the action \(Q\) values. For each episode, the bootstrapped exploration policy +selects a single head to play with during the episode. According to the selected head, only the relevant +output \(Q\) values are used. Using those \(Q\) values, the exploration policy then selects the action for acting.

+
+
+

Storing the transitions

+

For each transition, a Binomial mask is generated according to a predefined probability, and the number of output heads. +The mask is a binary vector where each element holds a 0 for heads that shouldn’t train on the specific transition, +and 1 for heads that should use the transition for training. The mask is stored as part of the transition info in +the replay buffer.

+
+
+

Training the network

+

First, sample a batch of transitions from the replay buffer. Run the current states through the network and get the +current \(Q\) value predictions for all the heads and all the actions. For each transition in the batch, +and for each output head, if the transition mask is 1 - change the targets of the played action to \(y_t\), +according to the standard DQN update rule:

+

\(y_t=r(s_t,a_t )+\gamma\cdot max_a Q(s_{t+1},a)\)

+

Otherwise, leave it intact so that the transition does not affect the learning of this head. +Then, train the online network according to the calculated targets.

+

As in DQN, once in every few thousand steps, copy the weights from the online network to the target network.

+
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/categorical_dqn.html b/docs/components/agents/value_optimization/categorical_dqn.html new file mode 100644 index 0000000..7e152f7 --- /dev/null +++ b/docs/components/agents/value_optimization/categorical_dqn.html @@ -0,0 +1,325 @@ + + + + + + + + + + + Categorical DQN — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Categorical DQN

+

Actions space: Discrete

+

References: A Distributional Perspective on Reinforcement Learning

+
+

Network Structure

+../../../_images/distributional_dqn.png +
+
+

Algorithm Description

+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.

    +
  2. +
  3. The Bellman update is projected to the set of atoms representing the \(Q\) values distribution, such +that the \(i-th\) component of the projected update is calculated as follows:

    +

    \((\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))\)

    +

    where: +* \([ \cdot ]\) bounds its argument in the range \([a, b]\) +* \(\hat{T}_{z_{j}}\) is the Bellman update for atom \(z_j\): \(\hat{T}_{z_{j}} := r+\gamma z_j\)

    +
  4. +
  5. Network is trained with the cross entropy loss between the resulting probability distribution and the target +probability distribution. Only the target of the actions that were actually taken is updated.

    +
  6. +
  7. Once in every few thousand steps, weights are copied from the online network to the target network.

    +
  8. +
+
+
+class rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • v_min – (float) +The minimal value that will be represented in the network output for predicting the Q value. +Corresponds to \(v_{min}\) in the paper.
  • +
  • v_max – (float) +The maximum value that will be represented in the network output for predicting the Q value. +Corresponds to \(v_{max}\) in the paper.
  • +
  • atoms – (int) +The number of atoms that will be used to discretize the range between v_min and v_max. +For the C51 algorithm described in the paper, the number of atoms is 51.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/double_dqn.html b/docs/components/agents/value_optimization/double_dqn.html new file mode 100644 index 0000000..cad5e2b --- /dev/null +++ b/docs/components/agents/value_optimization/double_dqn.html @@ -0,0 +1,298 @@ + + + + + + + + + + + Double DQN — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Double DQN

+

Actions space: Discrete

+

References: Deep Reinforcement Learning with Double Q-learning

+
+

Network Structure

+../../../_images/dqn.png +
+
+

Algorithm Description

+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.
  2. +
  3. Using the next states from the sampled batch, run the online network in order to find the $Q$ maximizing +action \(argmax_a Q(s_{t+1},a)\). For these actions, use the corresponding next states and run the target +network to calculate \(Q(s_{t+1},argmax_a Q(s_{t+1},a))\).
  4. +
  5. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), +use the current states from the sampled batch, and run the online network to get the current Q values predictions. +Set those values as the targets for the actions that were not actually played.
  6. +
  7. For each action that was played, use the following equation for calculating the targets of the network: +\(y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},argmax_a Q(s_{t+1},a))\)
  8. +
  9. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
  10. +
  11. Once in every few thousand steps, copy the weights from the online network to the target network.
  12. +
+
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/dqn.html b/docs/components/agents/value_optimization/dqn.html new file mode 100644 index 0000000..103d45e --- /dev/null +++ b/docs/components/agents/value_optimization/dqn.html @@ -0,0 +1,302 @@ + + + + + + + + + + + Deep Q Networks — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Deep Q Networks

+

Actions space: Discrete

+

References: Playing Atari with Deep Reinforcement Learning

+
+

Network Structure

+../../../_images/dqn.png +
+
+

Algorithm Description

+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.
  2. +
  3. Using the next states from the sampled batch, run the target network to calculate the \(Q\) values for each of +the actions \(Q(s_{t+1},a)\), and keep only the maximum value for each state.
  4. +
  5. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss), +use the current states from the sampled batch, and run the online network to get the current Q values predictions. +Set those values as the targets for the actions that were not actually played.
  6. +
  7. For each action that was played, use the following equation for calculating the targets of the network:​ $$ y_t=r(s_t,a_t)+γcdot max_a {Q(s_{t+1},a)} $$ +\(y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})\)
  8. +
  9. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
  10. +
  11. Once in every few thousand steps, copy the weights from the online network to the target network.
  12. +
+
+
+class rl_coach.agents.dqn_agent.DQNAlgorithmParameters[source]
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/dueling_dqn.html b/docs/components/agents/value_optimization/dueling_dqn.html new file mode 100644 index 0000000..0b9e91a --- /dev/null +++ b/docs/components/agents/value_optimization/dueling_dqn.html @@ -0,0 +1,289 @@ + + + + + + + + + + + Dueling DQN — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Dueling DQN

+

Actions space: Discrete

+

References: Dueling Network Architectures for Deep Reinforcement Learning

+
+

Network Structure

+../../../_images/dueling_dqn.png +
+
+

General Description

+

Dueling DQN presents a change in the network structure comparing to DQN.

+

Dueling DQN uses a specialized Dueling Q Head in order to separate \(Q\) to an \(A\) (advantage) +stream and a \(V\) stream. Adding this type of structure to the network head allows the network to better differentiate +actions from one another, and significantly improves the learning.

+

In many states, the values of the different actions are very similar, and it is less important which action to take. +This is especially important in environments where there are many actions to choose from. In DQN, on each training +iteration, for each of the states in the batch, we update the :ath:`Q` values only for the specific actions taken in +those states. This results in slower learning as we do not learn the \(Q\) values for actions that were not taken yet. +On dueling architecture, on the other hand, learning is faster - as we start learning the state-value even if only a +single action has been taken at this state.

+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/mmc.html b/docs/components/agents/value_optimization/mmc.html new file mode 100644 index 0000000..9a883d1 --- /dev/null +++ b/docs/components/agents/value_optimization/mmc.html @@ -0,0 +1,309 @@ + + + + + + + + + + + Mixed Monte Carlo — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Mixed Monte Carlo

+

Actions space: Discrete

+

References: Count-Based Exploration with Neural Density Models

+
+

Network Structure

+../../../_images/dqn.png +
+
+

Algorithm Description

+
+

Training the network

+

In MMC, targets are calculated as a mixture between Double DQN targets and full Monte Carlo samples (total discounted returns).

+

The DDQN targets are calculated in the same manner as in the DDQN agent:

+

\(y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))\)

+

The Monte Carlo targets are calculated by summing up the discounted rewards across the entire episode:

+

\(y_t^{MC}=\sum_{j=0}^T\gamma^j r(s_{t+j},a_{t+j} )\)

+

A mixing ratio $alpha$ is then used to get the final targets:

+

\(y_t=(1-\alpha)\cdot y_t^{DDQN}+\alpha \cdot y_t^{MC}\)

+

Finally, the online network is trained using the current states as inputs, and the calculated targets. +Once in every few thousand steps, copy the weights from the online network to the target network.

+
+
+class rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters[source]
+
+++ + + + +
Parameters:monte_carlo_mixing_rate – (float) +The mixing rate is used for setting the amount of monte carlo estimate (full return) that will be mixes into +the single-step bootstrapped targets.
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/n_step.html b/docs/components/agents/value_optimization/n_step.html new file mode 100644 index 0000000..0a006fe --- /dev/null +++ b/docs/components/agents/value_optimization/n_step.html @@ -0,0 +1,326 @@ + + + + + + + + + + + N-Step Q Learning — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

N-Step Q Learning

+

Actions space: Discrete

+

References: Asynchronous Methods for Deep Reinforcement Learning

+
+

Network Structure

+../../../_images/dqn.png +
+
+

Algorithm Description

+
+

Training the network

+

The \(N\)-step Q learning algorithm works in similar manner to DQN except for the following changes:

+
    +
  1. No replay buffer is used. Instead of sampling random batches of transitions, the network is trained every +\(N\) steps using the latest \(N\) steps played by the agent.
  2. +
  3. In order to stabilize the learning, multiple workers work together to update the network. +This creates the same effect as uncorrelating the samples used for training.
  4. +
  5. Instead of using single-step Q targets for the network, the rewards from $N$ consequent steps are accumulated +to form the \(N\)-step Q targets, according to the following equation: +\(R(s_t, a_t) = \sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})\) +where \(k\) is \(T_{max} - State\_Index\) for each state in the batch
  6. +
+
+
+class rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • num_steps_between_copying_online_weights_to_target – (StepMethod) +The number of steps between copying the online network weights to the target network weights.
  • +
  • apply_gradients_every_x_episodes – (int) +The number of episodes between applying the accumulated gradients to the network. After every +num_steps_between_gradient_updates steps, the agent will calculate the gradients for the collected data, +it will then accumulate it in internal accumulators, and will only apply them to the network once in every +apply_gradients_every_x_episodes episodes.
  • +
  • num_steps_between_gradient_updates – (int) +The number of steps between calculating gradients for the collected data. In the A3C paper, this parameter is +called t_max. Since this algorithm is on-policy, only the steps collected between each two gradient calculations +are used in the batch.
  • +
  • targets_horizon – (str) +Should be either ‘N-Step’ or ‘1-Step’, and defines the length for which to bootstrap the network values over. +Essentially, 1-Step follows the regular 1 step bootstrapping Q learning update. For more information, +please refer to the original paper (https://arxiv.org/abs/1602.01783)
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/naf.html b/docs/components/agents/value_optimization/naf.html new file mode 100644 index 0000000..047c6f8 --- /dev/null +++ b/docs/components/agents/value_optimization/naf.html @@ -0,0 +1,302 @@ + + + + + + + + + + + Normalized Advantage Functions — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Normalized Advantage Functions

+

Actions space: Continuous

+

References: Continuous Deep Q-Learning with Model-based Acceleration

+
+

Network Structure

+../../../_images/naf.png +
+
+

Algorithm Description

+
+

Choosing an action

+

The current state is used as an input to the network. The action mean \(\mu(s_t )\) is extracted from the output head. +It is then passed to the exploration policy which adds noise in order to encourage exploration.

+
+
+

Training the network

+

The network is trained by using the following targets: +\(y_t=r(s_t,a_t )+\gamma\cdot V(s_{t+1})\) +Use the next states as the inputs to the target network and extract the \(V\) value, from within the head, +to get \(V(s_{t+1} )\). Then, update the online network using the current states and actions as inputs, +and \(y_t\) as the targets. +After every training step, use a soft update in order to copy the weights from the online network to the target network.

+
+
+class rl_coach.agents.naf_agent.NAFAlgorithmParameters[source]
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/nec.html b/docs/components/agents/value_optimization/nec.html new file mode 100644 index 0000000..244814c --- /dev/null +++ b/docs/components/agents/value_optimization/nec.html @@ -0,0 +1,351 @@ + + + + + + + + + + + Neural Episodic Control — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Neural Episodic Control

+

Actions space: Discrete

+

References: Neural Episodic Control

+
+

Network Structure

+../../../_images/nec.png +
+
+

Algorithm Description

+
+

Choosing an action

+
    +
  1. Use the current state as an input to the online network and extract the state embedding, which is the intermediate +output from the middleware.
  2. +
  3. For each possible action \(a_i\), run the DND head using the state embedding and the selected action \(a_i\) as inputs. +The DND is queried and returns the \(P\) nearest neighbor keys and values. The keys and values are used to calculate +and return the action \(Q\) value from the network.
  4. +
  5. Pass all the \(Q\) values to the exploration policy and choose an action accordingly.
  6. +
  7. Store the state embeddings and actions taken during the current episode in a small buffer \(B\), in order to +accumulate transitions until it is possible to calculate the total discounted returns over the entire episode.
  8. +
+
+
+

Finalizing an episode

+

For each step in the episode, the state embeddings and the taken actions are stored in the buffer \(B\). +When the episode is finished, the replay buffer calculates the \(N\)-step total return of each transition in the +buffer, bootstrapped using the maximum \(Q\) value of the \(N\)-th transition. Those values are inserted +along with the total return into the DND, and the buffer \(B\) is reset.

+
+
+

Training the network

+

Train the network only when the DND has enough entries for querying.

+

To train the network, the current states are used as the inputs and the \(N\)-step returns are used as the targets. +The \(N\)-step return used takes into account \(N\) consecutive steps, and bootstraps the last value from +the network if necessary: +\(y_t=\sum_{j=0}^{N-1}\gamma^j r(s_{t+j},a_{t+j} ) +\gamma^N max_a Q(s_{t+N},a)\)

+
+
+class rl_coach.agents.nec_agent.NECAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • dnd_size – (int) +Defines the number of transitions that will be stored in each one of the DNDs. Note that the total number +of transitions that will be stored is dnd_size x num_actions.
  • +
  • l2_norm_added_delta – (float) +A small value that will be added when calculating the weight of each of the DND entries. This follows the +\(\delta\) patameter defined in the paper.
  • +
  • new_value_shift_coefficient – (float) +In the case where a ew embedding that was added to the DND was already present, the value that will be stored +in the DND is a mix between the existing value and the new value. The mix rate is defined by +new_value_shift_coefficient.
  • +
  • number_of_knn – (int) +The number of neighbors that will be retrieved for each DND query.
  • +
  • DND_key_error_threshold – (float) +When the DND is queried for a specific embedding, this threshold will be used to determine if the embedding +exists in the DND, since exact matches of embeddings are very rare.
  • +
  • propagate_updates_to_DND – (bool) +If set to True, when the gradients of the network will be calculated, the gradients will also be +backpropagated through the keys of the DND. The keys will then be updated as well, as if they were regular +network weights.
  • +
  • n_step – (int) +The bootstrap length that will be used when calculating the state values to store in the DND.
  • +
  • bootstrap_total_return_from_old_policy – (bool) +If set to True, the bootstrap that will be used to calculate each state-action value, is the network value +when the state was first seen, and not the latest, most up-to-date network value.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/pal.html b/docs/components/agents/value_optimization/pal.html new file mode 100644 index 0000000..7e26d5a --- /dev/null +++ b/docs/components/agents/value_optimization/pal.html @@ -0,0 +1,329 @@ + + + + + + + + + + + Persistent Advantage Learning — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Persistent Advantage Learning

+

Actions space: Discrete

+

References: Increasing the Action Gap: New Operators for Reinforcement Learning

+
+

Network Structure

+../../../_images/dqn.png +
+
+

Algorithm Description

+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.
  2. +
  3. Start by calculating the initial target values in the same manner as they are calculated in DDQN +\(y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))\)
  4. +
  5. The action gap \(V(s_t )-Q(s_t,a_t)\) should then be subtracted from each of the calculated targets. +To calculate the action gap, run the target network using the current states and get the \(Q\) values +for all the actions. Then estimate \(V\) as the maximum predicted \(Q\) value for the current state: +\(V(s_t )=max_a Q(s_t,a)\)
  6. +
  7. For advantage learning (AL), reduce the action gap weighted by a predefined parameter \(\alpha\) from +the targets \(y_t^{DDQN}\): +\(y_t=y_t^{DDQN}-\alpha \cdot (V(s_t )-Q(s_t,a_t ))\)
  8. +
  9. For persistent advantage learning (PAL), the target network is also used in order to calculate the action +gap for the next state: +\(V(s_{t+1} )-Q(s_{t+1},a_{t+1})\) +where \(a_{t+1}\) is chosen by running the next states through the online network and choosing the action that +has the highest predicted \(Q\) value. Finally, the targets will be defined as - +\(y_t=y_t^{DDQN}-\alpha \cdot min(V(s_t )-Q(s_t,a_t ),V(s_{t+1} )-Q(s_{t+1},a_{t+1} ))\)
  10. +
  11. Train the online network using the current states as inputs, and with the aforementioned targets.
  12. +
  13. Once in every few thousand steps, copy the weights from the online network to the target network.
  14. +
+
+
+class rl_coach.agents.pal_agent.PALAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • pal_alpha – (float) +A factor that weights the amount by which the advantage learning update will be taken into account.
  • +
  • persistent_advantage_learning – (bool) +If set to True, the persistent mode of advantage learning will be used, which encourages the agent to take +the same actions one after the other instead of changing actions.
  • +
  • monte_carlo_mixing_rate – (float) +The amount of monte carlo values to mix into the targets of the network. The monte carlo values are just the +total discounted returns, and they can help reduce the time it takes for the network to update to the newly +seen values, since it is not based on bootstrapping the current network values.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/qr_dqn.html b/docs/components/agents/value_optimization/qr_dqn.html new file mode 100644 index 0000000..9542fa6 --- /dev/null +++ b/docs/components/agents/value_optimization/qr_dqn.html @@ -0,0 +1,315 @@ + + + + + + + + + + + Quantile Regression DQN — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Quantile Regression DQN

+

Actions space: Discrete

+

References: Distributional Reinforcement Learning with Quantile Regression

+
+

Network Structure

+../../../_images/qr_dqn.png +
+
+

Algorithm Description

+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.
  2. +
  3. First, the next state quantiles are predicted. These are used in order to calculate the targets for the network, +by following the Bellman equation. +Next, the current quantile locations for the current states are predicted, sorted, and used for calculating the +quantile midpoints targets.
  4. +
  5. The network is trained with the quantile regression loss between the resulting quantile locations and the target +quantile locations. Only the targets of the actions that were actually taken are updated.
  6. +
  7. Once in every few thousand steps, weights are copied from the online network to the target network.
  8. +
+
+
+class rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • atoms – (int) +the number of atoms to predict for each action
  • +
  • huber_loss_interval – (float) +One of the huber loss parameters, and is referred to as \(\kapa\) in the paper. +It describes the interval [-k, k] in which the huber loss acts as a MSE loss.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/agents/value_optimization/rainbow.html b/docs/components/agents/value_optimization/rainbow.html new file mode 100644 index 0000000..9aa1c93 --- /dev/null +++ b/docs/components/agents/value_optimization/rainbow.html @@ -0,0 +1,337 @@ + + + + + + + + + + + Rainbow — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Rainbow

+

Actions space: Discrete

+

References: Rainbow: Combining Improvements in Deep Reinforcement Learning

+
+

Network Structure

+../../../_images/rainbow.png +
+
+

Algorithm Description

+

Rainbow combines 6 recent advancements in reinforcement learning:

+
    +
  • N-step returns
  • +
  • Distributional state-action value learning
  • +
  • Dueling networks
  • +
  • Noisy Networks
  • +
  • Double DQN
  • +
  • Prioritized Experience Replay
  • +
+
+

Training the network

+
    +
  1. Sample a batch of transitions from the replay buffer.

    +
  2. +
  3. The Bellman update is projected to the set of atoms representing the \(Q\) values distribution, such +that the \(i-th\) component of the projected update is calculated as follows:

    +

    \((\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))\)

    +

    where: +* \([ \cdot ]\) bounds its argument in the range \([a, b]\) +* \(\hat{T}_{z_{j}}\) is the Bellman update for atom +\(z_j\): \(\hat{T}_{z_{j}} := r_t+\gamma r_{t+1} + ... + \gamma r_{t+n-1} + \gamma^{n-1} z_j\)

    +
  4. +
  5. Network is trained with the cross entropy loss between the resulting probability distribution and the target +probability distribution. Only the target of the actions that were actually taken is updated.

    +
  6. +
  7. Once in every few thousand steps, weights are copied from the online network to the target network.

    +
  8. +
  9. After every training step, the priorities of the batch transitions are updated in the prioritized replay buffer +using the KL divergence loss that is returned from the network.

    +
  10. +
+
+
+class rl_coach.agents.rainbow_dqn_agent.RainbowDQNAlgorithmParameters[source]
+
+++ + + + +
Parameters:
    +
  • n_step – (int) +The number of steps to bootstrap the network over. The first N-1 steps actual rewards will be accumulated +using an exponentially growing discount factor, and the Nth step will be bootstrapped from the network +prediction.
  • +
  • store_transitions_only_when_episodes_are_terminated – (bool) +If set to True, the transitions will be stored in an Episode object until the episode ends, and just then +written to the memory. This is useful since we want to calculate the N-step discounted rewards before saving the +transitions into the memory, and to do so we need the entire episode first.
  • +
+
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/architectures/index.html b/docs/components/architectures/index.html new file mode 100644 index 0000000..60c455a --- /dev/null +++ b/docs/components/architectures/index.html @@ -0,0 +1,793 @@ + + + + + + + + + + + Architectures — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Architectures

+

Architectures contain all the classes that implement the neural network related stuff for the agent. +Since Coach is intended to work with multiple neural network frameworks, each framework will implement its +own components under a dedicated directory. For example, tensorflow components will contain all the neural network +parts that are implemented using TensorFlow.

+
+
+class rl_coach.base_parameters.NetworkParameters(force_cpu=False, async_training=False, shared_optimizer=True, scale_down_gradients_by_number_of_workers_for_sync_training=True, clip_gradients=None, gradients_clipping_method=<GradientClippingMethod.ClipByGlobalNorm: 0>, l2_regularization=0, learning_rate=0.00025, learning_rate_decay_rate=0, learning_rate_decay_steps=0, input_embedders_parameters={}, embedding_merger_type=<EmbeddingMergerType.Concat: 0>, middleware_parameters=None, heads_parameters=[], use_separate_networks_per_head=False, optimizer_type='Adam', optimizer_epsilon=0.0001, adam_optimizer_beta1=0.9, adam_optimizer_beta2=0.99, rms_prop_optimizer_decay=0.9, batch_size=32, replace_mse_with_huber_loss=False, create_target_network=False, tensorflow_support=True)[source]
+
+++ + + + +
Parameters:
    +
  • force_cpu – Force the neural networks to run on the CPU even if a GPU is available
  • +
  • async_training – If set to True, asynchronous training will be used, meaning that each workers will progress in its own +speed, while not waiting for the rest of the workers to calculate their gradients.
  • +
  • shared_optimizer – If set to True, a central optimizer which will be shared with all the workers will be used for applying +gradients to the network. Otherwise, each worker will have its own optimizer with its own internal +parameters that will only be affected by the gradients calculated by that worker
  • +
  • scale_down_gradients_by_number_of_workers_for_sync_training – If set to True, in synchronous training, the gradients of each worker will be scaled down by the +number of workers. This essentially means that the gradients applied to the network are the average +of the gradients over all the workers.
  • +
  • clip_gradients – A value that will be used for clipping the gradients of the network. If set to None, no gradient clipping +will be applied. Otherwise, the gradients will be clipped according to the gradients_clipping_method.
  • +
  • gradients_clipping_method – A gradient clipping method, defined by a GradientClippingMethod enum, and that will be used to clip the +gradients of the network. This will only be used if the clip_gradients value is defined as a value other +than None.
  • +
  • l2_regularization – A L2 regularization weight that will be applied to the network weights while calculating the loss function
  • +
  • learning_rate – The learning rate for the network
  • +
  • learning_rate_decay_rate – If this value is larger than 0, an exponential decay will be applied to the network learning rate. +The rate of the decay is defined by this parameter, and the number of training steps the decay will be +applied is defined by learning_rate_decay_steps. Notice that both parameters should be defined in order +for this to work correctly.
  • +
  • learning_rate_decay_steps – If the learning_rate_decay_rate of the network is larger than 0, an exponential decay will be applied to +the network learning rate. The number of steps the decay will be applied is defined by this parameter. +Notice that both this parameter, as well as learning_rate_decay_rate should be defined in order for the +learning rate decay to work correctly.
  • +
  • input_embedders_parameters – A dictionary mapping between input names and input embedders (InputEmbedderParameters) to use for the +network. Each of the keys is an input name as returned from the environment in the state. +For example, if the environment returns a state containing ‘observation’ and ‘measurements’, then +the keys for the input embedders dictionary can be either ‘observation’ to use the observation as input, +‘measurements’ to use the measurements as input, or both. +The embedder type will be automatically selected according to the input type. Vector inputs will +produce a fully connected embedder, and image inputs will produce a convolutional embedder.
  • +
  • embedding_merger_type – The type of embedding merging to use, given by one of the EmbeddingMergerType enum values. +This will be used to merge the outputs of all the input embedders into a single embbeding.
  • +
  • middleware_parameters – The parameters of the middleware to use, given by a MiddlewareParameters object. +Each network will have only a single middleware embedder which will take the merged embeddings from the +input embedders and pass them through more neural network layers.
  • +
  • heads_parameters – A list of heads for the network given by their corresponding HeadParameters. +Each network can have one or multiple network heads, where each one will take the output of the middleware +and make some additional computation on top of it. Additionally, each head calculates a weighted loss value, +and the loss values from all the heads will be summed later on.
  • +
  • use_separate_networks_per_head – A flag that allows using different copies of the input embedders and middleware for each one of the heads. +Regularly, the heads will have a shared input, but in the case where use_separate_networks_per_head is set +to True, each one of the heads will get a different input.
  • +
  • optimizer_type – A string specifying the optimizer type to use for updating the network. The available optimizers are +Adam, RMSProp and LBFGS.
  • +
  • optimizer_epsilon – An internal optimizer parameter used for Adam and RMSProp.
  • +
  • adam_optimizer_beta1 – An beta1 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the +optimizer for the network.
  • +
  • adam_optimizer_beta2 – An beta2 internal optimizer parameter used for Adam. It will be used only if Adam was selected as the +optimizer for the network.
  • +
  • rms_prop_optimizer_decay – The decay value for the RMSProp optimizer, which will be used only in case the RMSProp optimizer was +selected for this network.
  • +
  • batch_size – The batch size to use when updating the network.
  • +
  • replace_mse_with_huber_loss
  • +
  • create_target_network – If this flag is set to True, an additional copy of the network will be created and initialized with the +same weights as the online network. It can then be queried, and its weights can be synced from the +online network at will.
  • +
  • tensorflow_support – A flag which specifies if the network is supported by the TensorFlow framework.
  • +
+
+
+ +
+

Architecture

+
+
+class rl_coach.architectures.architecture.Architecture(agent_parameters: rl_coach.base_parameters.AgentParameters, spaces: rl_coach.spaces.SpacesDefinition, name: str = '')[source]
+

Creates a neural network ‘architecture’, that can be trained and used for inference.

+ +++ + + + +
Parameters:
    +
  • agent_parameters – the agent parameters
  • +
  • spaces – the spaces (observation, action, etc.) definition of the agent
  • +
  • name – the name of the network
  • +
+
+
+
+accumulate_gradients(inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], additional_fetches: list = None, importance_weights: numpy.ndarray = None, no_accumulation: bool = False) → Tuple[float, List[float], float, list][source]
+

Given a batch of inputs (i.e. states) and targets (e.g. discounted rewards), computes and accumulates the +gradients for model parameters. Will run forward and backward pass to compute gradients, clip the gradient +values if required and then accumulate gradients from all learners. It does not update the model weights, +that’s performed in apply_and_reset_gradients method.

+

Once gradients are accumulated, they are accessed by accumulated_gradients property of this class.å

+ +++ + + + + + +
Parameters:
    +
  • inputs

    typically the environment states (but can also contain other data for loss) +(e.g. {‘observation’: numpy.ndarray} with observation of shape (batch_size, observation_space_size) or

    +
    +
    (batch_size, observation_space_size, stack_size) or
    +

    {‘observation’: numpy.ndarray, ‘output_0_0’: numpy.ndarray} with output_0_0 of shape (batch_size,))

    +
  • +
  • targets – targets for calculating loss. For example discounted rewards for value network +for calculating the value-network loss would be a target. Length of list and order of arrays in +the list matches that of network losses which are defined by network parameters
  • +
  • additional_fetches – list of additional values to fetch and return. The type of each list +element is framework dependent.
  • +
  • importance_weights – ndarray of shape (batch_size,) to multiply with batch loss.
  • +
  • no_accumulation – if True, set gradient values to the new gradients, otherwise sum with previously +calculated gradients
  • +
+
Returns:

tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors +total_loss (float): sum of all head losses +losses (list of float): list of all losses. The order is list of target losses followed by list of

+
+

regularization losses. The specifics of losses is dependant on the network parameters +(number of heads, etc.)

+
+

norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied +fetched_tensors: all values for additional_fetches

+

+
+
+ +
+
+apply_and_reset_gradients(gradients: List[numpy.ndarray], scaler: float = 1.0) → None[source]
+

Applies the given gradients to the network weights and resets the gradient accumulations. +Has the same impact as calling apply_gradients, then reset_accumulated_gradients.

+ +++ + + + +
Parameters:
    +
  • gradients – gradients for the parameter weights, taken from accumulated_gradients property +of an identical network (either self or another identical network)
  • +
  • scaler – A scaling factor that allows rescaling the gradients before applying them
  • +
+
+
+ +
+
+apply_gradients(gradients: List[numpy.ndarray], scaler: float = 1.0) → None[source]
+

Applies the given gradients to the network weights. +Will be performed sync or async depending on network_parameters.async_training

+ +++ + + + +
Parameters:
    +
  • gradients – gradients for the parameter weights, taken from accumulated_gradients property +of an identical network (either self or another identical network)
  • +
  • scaler – A scaling factor that allows rescaling the gradients before applying them
  • +
+
+
+ +
+
+get_variable_value(variable: Any) → numpy.ndarray[source]
+

Gets value of a specified variable. Type of variable is dependant on the framework. +Example of a variable is head.kl_coefficient, which could be a symbol for evaluation +or could be a string representing the value.

+ +++ + + + + + +
Parameters:variable – variable of interest
Returns:value of the specified variable
+
+ +
+
+get_weights() → List[numpy.ndarray][source]
+

Gets model weights as a list of ndarrays. It is used for synchronizing weight between two identical networks.

+ +++ + + + +
Returns:list weights as ndarray
+
+ +
+
+static parallel_predict(sess: Any, network_input_tuples: List[Tuple[Architecture, Dict[str, numpy.ndarray]]]) → Tuple[numpy.ndarray, ...][source]
+
+++ + + + + + +
Parameters:
    +
  • sess – active session to use for prediction
  • +
  • network_input_tuples – tuple of network and corresponding input
  • +
+
Returns:

list or tuple of outputs from all networks

+
+
+ +
+
+predict(inputs: Dict[str, numpy.ndarray], outputs: List[Any] = None, squeeze_output: bool = True, initial_feed_dict: Dict[Any, numpy.ndarray] = None) → Tuple[numpy.ndarray, ...][source]
+

Given input observations, use the model to make predictions (e.g. action or value).

+ +++ + + + + + +
Parameters:
    +
  • inputs – current state (i.e. observations, measurements, goals, etc.) +(e.g. {‘observation’: numpy.ndarray} of shape (batch_size, observation_space_size))
  • +
  • outputs – list of outputs to return. Return all outputs if unspecified. Type of the list elements +depends on the framework backend.
  • +
  • squeeze_output – call squeeze_list on output before returning if True
  • +
  • initial_feed_dict – a dictionary of extra inputs for forward pass.
  • +
+
Returns:

predictions of action or value of shape (batch_size, action_space_size) for action predictions)

+
+
+ +
+
+reset_accumulated_gradients() → None[source]
+

Sets gradient of all parameters to 0.

+

Once gradients are reset, they must be accessible by accumulated_gradients property of this class, +which must return a list of numpy ndarrays. Child class must ensure that accumulated_gradients is set.

+
+ +
+
+set_variable_value(assign_op: Any, value: numpy.ndarray, placeholder: Any)[source]
+

Updates the value of a specified variable. Type of assign_op is dependant on the framework +and is a unique identifier for assigning value to a variable. For example an agent may use +head.assign_kl_coefficient. There is a one to one mapping between assign_op and placeholder +(in the example above, placeholder would be head.kl_coefficient_ph).

+ +++ + + + +
Parameters:
    +
  • assign_op – a parameter representing the operation for assigning value to a specific variable
  • +
  • value – value of the specified variable used for update
  • +
  • placeholder – a placeholder for binding the value to assign_op.
  • +
+
+
+ +
+
+set_weights(weights: List[numpy.ndarray], rate: float = 1.0) → None[source]
+

Sets model weights for provided layer parameters.

+ +++ + + + + + +
Parameters:
    +
  • weights – list of model weights in the same order as received in get_weights
  • +
  • rate – controls the mixture of given weight values versus old weight values. +i.e. new_weight = rate * given_weight + (1 - rate) * old_weight
  • +
+
Returns:

None

+
+
+ +
+
+train_on_batch(inputs: Dict[str, numpy.ndarray], targets: List[numpy.ndarray], scaler: float = 1.0, additional_fetches: list = None, importance_weights: numpy.ndarray = None) → Tuple[float, List[float], float, list][source]
+

Given a batch of inputs (e.g. states) and targets (e.g. discounted rewards), takes a training step: i.e. runs a +forward pass and backward pass of the network, accumulates the gradients and applies an optimization step to +update the weights. +Calls accumulate_gradients followed by apply_and_reset_gradients. +Note: Currently an unused method.

+ +++ + + + + + +
Parameters:
    +
  • inputs – typically the environment states (but can also contain other data necessary for loss). +(e.g. {‘observation’: numpy.ndarray} with observation of shape (batch_size, observation_space_size) or +(batch_size, observation_space_size, stack_size) or +{‘observation’: numpy.ndarray, ‘output_0_0’: numpy.ndarray} with output_0_0 of shape (batch_size,))
  • +
  • targets – target values of shape (batch_size, ). For example discounted rewards for value network +for calculating the value-network loss would be a target. Length of list and order of arrays in +the list matches that of network losses which are defined by network parameters
  • +
  • scaler – value to scale gradients by before optimizing network weights
  • +
  • additional_fetches – list of additional values to fetch and return. The type of each list +element is framework dependent.
  • +
  • importance_weights – ndarray of shape (batch_size,) to multiply with batch loss.
  • +
+
Returns:

tuple of total_loss, losses, norm_unclipped_grads, fetched_tensors +total_loss (float): sum of all head losses +losses (list of float): list of all losses. The order is list of target losses followed by list

+
+

of regularization losses. The specifics of losses is dependant on the network parameters +(number of heads, etc.)

+
+

norm_unclippsed_grads (float): global norm of all gradients before any gradient clipping is applied +fetched_tensors: all values for additional_fetches

+

+
+
+ +
+ +
+
+

NetworkWrapper

+../../_images/distributed.png +
+
+class rl_coach.architectures.network_wrapper.NetworkWrapper(agent_parameters: rl_coach.base_parameters.AgentParameters, has_target: bool, has_global: bool, name: str, spaces: rl_coach.spaces.SpacesDefinition, replicated_device=None, worker_device=None)[source]
+

The network wrapper contains multiple copies of the same network, each one with a different set of weights which is +updating in a different time scale. The network wrapper will always contain an online network. +It will contain an additional slow updating target network if it was requested by the user, +and it will contain a global network shared between different workers, if Coach is run in a single-node +multi-process distributed mode. The network wrapper contains functionality for managing these networks and syncing +between them.

+
+
+apply_gradients_and_sync_networks(reset_gradients=True)[source]
+

Applies the gradients accumulated in the online network to the global network or to itself and syncs the +networks if necessary

+ +++ + + + +
Parameters:reset_gradients – If set to True, the accumulated gradients wont be reset to 0 after applying them to +the network. this is useful when the accumulated gradients are overwritten instead +if accumulated by the accumulate_gradients function. this allows reducing time +complexity for this function by around 10%
+
+ +
+
+apply_gradients_to_global_network(gradients=None)[source]
+

Apply gradients from the online network on the global network

+ +++ + + + + + +
Parameters:gradients – optional gradients that will be used instead of teh accumulated gradients
Returns:
+
+ +
+
+apply_gradients_to_online_network(gradients=None)[source]
+

Apply gradients from the online network on itself

+ +++ + + + +
Returns:
+
+ +
+
+get_global_variables()[source]
+

Get all the variables that are shared between threads

+ +++ + + + +
Returns:a list of all the variables that are shared between threads
+
+ +
+
+get_local_variables()[source]
+

Get all the variables that are local to the thread

+ +++ + + + +
Returns:a list of all the variables that are local to the thread
+
+ +
+
+parallel_prediction(network_input_tuples: List[Tuple])[source]
+

Run several network prediction in parallel. Currently this only supports running each of the network once.

+ +++ + + + + + +
Parameters:network_input_tuples – a list of tuples where the first element is the network (online_network, +target_network or global_network) and the second element is the inputs
Returns:the outputs of all the networks in the same order as the inputs were given
+
+ +
+
+set_is_training(state: bool)[source]
+

Set the phase of the network between training and testing

+ +++ + + + + + +
Parameters:state – The current state (True = Training, False = Testing)
Returns:None
+
+ +
+
+sync()[source]
+

Initializes the weights of the networks to match each other

+ +++ + + + +
Returns:
+
+ +
+
+train_and_sync_networks(inputs, targets, additional_fetches=[], importance_weights=None)[source]
+

A generic training function that enables multi-threading training using a global network if necessary.

+ +++ + + + + + +
Parameters:
    +
  • inputs – The inputs for the network.
  • +
  • targets – The targets corresponding to the given inputs
  • +
  • additional_fetches – Any additional tensor the user wants to fetch
  • +
  • importance_weights – A coefficient for each sample in the batch, which will be used to rescale the loss +error of this sample. If it is not given, the samples losses won’t be scaled
  • +
+
Returns:

The loss of the training iteration

+
+
+ +
+
+update_online_network(rate=1.0)[source]
+

Copy weights: global network >>> online network

+ +++ + + + +
Parameters:rate – the rate of copying the weights - 1 for copying exactly
+
+ +
+
+update_target_network(rate=1.0)[source]
+

Copy weights: online network >>> target network

+ +++ + + + +
Parameters:rate – the rate of copying the weights - 1 for copying exactly
+
+ +
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/core_types.html b/docs/components/core_types.html new file mode 100644 index 0000000..282b107 --- /dev/null +++ b/docs/components/core_types.html @@ -0,0 +1,696 @@ + + + + + + + + + + + Core Types — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Core Types

+
+

ActionInfo

+
+
+class rl_coach.core_types.ActionInfo(action: Union[int, float, numpy.ndarray, List], action_probability: float = 0, action_value: float = 0.0, state_value: float = 0.0, max_action_value: float = None, action_intrinsic_reward: float = 0)[source]
+

Action info is a class that holds an action and various additional information details about it

+ +++ + + + +
Parameters:
    +
  • action – the action
  • +
  • action_probability – the probability that the action was given when selecting it
  • +
  • action_value – the state-action value (Q value) of the action
  • +
  • state_value – the state value (V value) of the state where the action was taken
  • +
  • max_action_value – in case this is an action that was selected randomly, this is the value of the action +that received the maximum value. if no value is given, the action is assumed to be the +action with the maximum value
  • +
  • action_intrinsic_reward – can contain any intrinsic reward that the agent wants to add to this action +selection
  • +
+
+
+ +
+
+

Batch

+
+
+class rl_coach.core_types.Batch(transitions: List[rl_coach.core_types.Transition])[source]
+

A wrapper around a list of transitions that helps extracting batches of parameters from it. +For example, one can extract a list of states corresponding to the list of transitions. +The class uses lazy evaluation in order to return each of the available parameters.

+ +++ + + + +
Parameters:transitions – a list of transitions to extract the batch from
+
+
+actions(expand_dims=False) → numpy.ndarray[source]
+

if the actions were not converted to a batch before, extract them to a batch and then return the batch

+ +++ + + + + + +
Parameters:expand_dims – add an extra dimension to the actions batch
Returns:a numpy array containing all the actions of the batch
+
+ +
+
+game_overs(expand_dims=False) → numpy.ndarray[source]
+

if the game_overs were not converted to a batch before, extract them to a batch and then return the batch

+ +++ + + + + + +
Parameters:expand_dims – add an extra dimension to the game_overs batch
Returns:a numpy array containing all the game over flags of the batch
+
+ +
+
+goals(expand_dims=False) → numpy.ndarray[source]
+

if the goals were not converted to a batch before, extract them to a batch and then return the batch +if the goal was not filled, this will raise an exception

+ +++ + + + + + +
Parameters:expand_dims – add an extra dimension to the goals batch
Returns:a numpy array containing all the goals of the batch
+
+ +
+
+info(key, expand_dims=False) → numpy.ndarray[source]
+

if the given info dictionary key was not converted to a batch before, extract it to a batch and then return the +batch. if the key is not part of the keys in the info dictionary, this will raise an exception

+ +++ + + + + + +
Parameters:expand_dims – add an extra dimension to the info batch
Returns:a numpy array containing all the info values of the batch corresponding to the given key
+
+ +
+
+info_as_list(key) → list[source]
+

get the info and store it internally as a list, if wasn’t stored before. return it as a list +:param expand_dims: add an extra dimension to the info batch +:return: a list containing all the info values of the batch corresponding to the given key

+
+ +
+
+n_step_discounted_rewards(expand_dims=False) → numpy.ndarray[source]
+
+
if the n_step_discounted_rewards were not converted to a batch before, extract them to a batch and then return
+
the batch
+
+

if the n step discounted rewards were not filled, this will raise an exception +:param expand_dims: add an extra dimension to the total_returns batch +:return: a numpy array containing all the total return values of the batch

+
+ +
+
+next_states(fetches: List[str], expand_dims=False) → Dict[str, numpy.ndarray][source]
+

follow the keys in fetches to extract the corresponding items from the next states in the batch +if these keys were not already extracted before. return only the values corresponding to those keys

+ +++ + + + + + +
Parameters:
    +
  • fetches – the keys of the state dictionary to extract
  • +
  • expand_dims – add an extra dimension to each of the value batches
  • +
+
Returns:

a dictionary containing a batch of values correponding to each of the given fetches keys

+
+
+ +
+
+rewards(expand_dims=False) → numpy.ndarray[source]
+

if the rewards were not converted to a batch before, extract them to a batch and then return the batch

+ +++ + + + + + +
Parameters:expand_dims – add an extra dimension to the rewards batch
Returns:a numpy array containing all the rewards of the batch
+
+ +
+
+shuffle() → None[source]
+

Shuffle all the transitions in the batch

+ +++ + + + +
Returns:None
+
+ +
+
+size
+
+++ + + + +
Returns:the size of the batch
+
+ +
+
+slice(start, end) → None[source]
+

Keep a slice from the batch and discard the rest of the batch

+ +++ + + + + + +
Parameters:
    +
  • start – the start index in the slice
  • +
  • end – the end index in the slice
  • +
+
Returns:

None

+
+
+ +
+
+states(fetches: List[str], expand_dims=False) → Dict[str, numpy.ndarray][source]
+

follow the keys in fetches to extract the corresponding items from the states in the batch +if these keys were not already extracted before. return only the values corresponding to those keys

+ +++ + + + + + +
Parameters:
    +
  • fetches – the keys of the state dictionary to extract
  • +
  • expand_dims – add an extra dimension to each of the value batches
  • +
+
Returns:

a dictionary containing a batch of values correponding to each of the given fetches keys

+
+
+ +
+ +
+
+

EnvResponse

+
+
+class rl_coach.core_types.EnvResponse(next_state: Dict[str, numpy.ndarray], reward: Union[int, float, numpy.ndarray], game_over: bool, info: Dict = None, goal: numpy.ndarray = None)[source]
+

An env response is a collection containing the information returning from the environment after a single action +has been performed on it.

+ +++ + + + +
Parameters:
    +
  • next_state – The new state that the environment has transitioned into. Assumed to be a dictionary where the +observation is located at state[‘observation’]
  • +
  • reward – The reward received from the environment
  • +
  • game_over – A boolean which should be True if the episode terminated after +the execution of the action.
  • +
  • info – any additional info from the environment
  • +
  • goal – a goal defined by the environment
  • +
+
+
+ +
+
+

Episode

+
+
+class rl_coach.core_types.Episode(discount: float = 0.99, bootstrap_total_return_from_old_policy: bool = False, n_step: int = -1)[source]
+

An Episode represents a set of sequential transitions, that end with a terminal state.

+ +++ + + + +
Parameters:
    +
  • discount – the discount factor to use when calculating total returns
  • +
  • bootstrap_total_return_from_old_policy – should the total return be bootstrapped from the values in the +memory
  • +
  • n_step – the number of future steps to sum the reward over before bootstrapping
  • +
+
+
+
+get_first_transition() → rl_coach.core_types.Transition[source]
+

Get the first transition in the episode, or None if there are no transitions available

+ +++ + + + +
Returns:The first transition in the episode
+
+ +
+
+get_last_transition() → rl_coach.core_types.Transition[source]
+

Get the last transition in the episode, or None if there are no transition available

+ +++ + + + +
Returns:The last transition in the episode
+
+ +
+
+get_transition(transition_idx: int) → rl_coach.core_types.Transition[source]
+

Get a specific transition by its index.

+ +++ + + + + + +
Parameters:transition_idx – The index of the transition to get
Returns:The transition which is stored in the given index
+
+ +
+
+get_transitions_attribute(attribute_name: str) → List[Any][source]
+

Get the values for some transition attribute from all the transitions in the episode. +For example, this allows getting the rewards for all the transitions as a list by calling +get_transitions_attribute(‘reward’)

+ +++ + + + + + +
Parameters:attribute_name – The name of the attribute to extract from all the transitions
Returns:A list of values from all the transitions according to the attribute given in attribute_name
+
+ +
+
+insert(transition: rl_coach.core_types.Transition) → None[source]
+

Insert a new transition to the episode. If the game_over flag in the transition is set to True, +the episode will be marked as complete.

+ +++ + + + + + +
Parameters:transition – The new transition to insert to the episode
Returns:None
+
+ +
+
+is_empty() → bool[source]
+

Check if the episode is empty

+ +++ + + + +
Returns:A boolean value determining if the episode is empty or not
+
+ +
+
+length() → int[source]
+

Return the length of the episode, which is the number of transitions it holds.

+ +++ + + + +
Returns:The number of transitions in the episode
+
+ +
+
+update_discounted_rewards()[source]
+

Update the discounted returns for all the transitions in the episode. +The returns will be calculated according to the rewards of each transition, together with the number of steps +to bootstrap from and the discount factor, as defined by n_step and discount respectively when initializing +the episode.

+ +++ + + + +
Returns:None
+
+ +
+ +
+
+

Transition

+
+
+class rl_coach.core_types.Transition(state: Dict[str, numpy.ndarray] = None, action: Union[int, float, numpy.ndarray, List] = None, reward: Union[int, float, numpy.ndarray] = None, next_state: Dict[str, numpy.ndarray] = None, game_over: bool = None, info: Dict = None)[source]
+

A transition is a tuple containing the information of a single step of interaction +between the agent and the environment. The most basic version should contain the following values: +(current state, action, reward, next state, game over) +For imitation learning algorithms, if the reward, next state or game over is not known, +it is sufficient to store the current state and action taken by the expert.

+ +++ + + + +
Parameters:
    +
  • state – The current state. Assumed to be a dictionary where the observation +is located at state[‘observation’]
  • +
  • action – The current action that was taken
  • +
  • reward – The reward received from the environment
  • +
  • next_state – The next state of the environment after applying the action. +The next state should be similar to the state in its structure.
  • +
  • game_over – A boolean which should be True if the episode terminated after +the execution of the action.
  • +
  • info – A dictionary containing any additional information to be stored in the transition
  • +
+
+
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/environments/index.html b/docs/components/environments/index.html new file mode 100644 index 0000000..c9706d4 --- /dev/null +++ b/docs/components/environments/index.html @@ -0,0 +1,650 @@ + + + + + + + + + + + Environments — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Environments

+
+
+class rl_coach.environments.environment.Environment(level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, **kwargs)[source]
+
+++ + + + +
Parameters:
    +
  • level – The environment level. Each environment can have multiple levels
  • +
  • seed – a seed for the random number generator of the environment
  • +
  • frame_skip – number of frames to skip (while repeating the same action) between each two agent directives
  • +
  • human_control – human should control the environment
  • +
  • visualization_parameters – a blob of parameters used for visualization of the environment
  • +
  • **kwargs

    as the class is instantiated by EnvironmentParameters, this is used to support having +additional arguments which will be ignored by this class, but might be used by others

    +
  • +
+
+
+
+action_space
+

Get the action space of the environment

+ +++ + + + +
Returns:the action space
+
+ +
+
+get_action_from_user() → Union[int, float, numpy.ndarray, List][source]
+

Get an action from the user keyboard

+ +++ + + + +
Returns:action index
+
+ +
+
+get_available_keys() → List[Tuple[str, Union[int, float, numpy.ndarray, List]]][source]
+

Return a list of tuples mapping between action names and the keyboard key that triggers them

+ +++ + + + +
Returns:a list of tuples mapping between action names and the keyboard key that triggers them
+
+ +
+
+get_goal() → Union[None, numpy.ndarray][source]
+

Get the current goal that the agents needs to achieve in the environment

+ +++ + + + +
Returns:The goal
+
+ +
+
+get_random_action() → Union[int, float, numpy.ndarray, List][source]
+

Returns an action picked uniformly from the available actions

+ +++ + + + +
Returns:a numpy array with a random action
+
+ +
+
+get_rendered_image() → numpy.ndarray[source]
+

Return a numpy array containing the image that will be rendered to the screen. +This can be different from the observation. For example, mujoco’s observation is a measurements vector.

+ +++ + + + +
Returns:numpy array containing the image that will be rendered to the screen
+
+ +
+
+goal_space
+

Get the state space of the environment

+ +++ + + + +
Returns:the observation space
+
+ +
+
+handle_episode_ended() → None[source]
+

End an episode

+ +++ + + + +
Returns:None
+
+ +
+
+last_env_response
+

Get the last environment response

+ +++ + + + +
Returns:a dictionary that contains the state, reward, etc.
+
+ +
+
+phase
+

Get the phase of the environment +:return: the current phase

+
+ +
+
+render() → None[source]
+

Call the environment function for rendering to the screen

+ +++ + + + +
Returns:None
+
+ +
+
+reset_internal_state(force_environment_reset=False) → rl_coach.core_types.EnvResponse[source]
+

Reset the environment and all the variable of the wrapper

+ +++ + + + + + +
Parameters:force_environment_reset – forces environment reset even when the game did not end
Returns:A dictionary containing the observation, reward, done flag, action and measurements
+
+ +
+
+set_goal(goal: Union[None, numpy.ndarray]) → None[source]
+

Set the current goal that the agent needs to achieve in the environment

+ +++ + + + + + +
Parameters:goal – the goal that needs to be achieved
Returns:None
+
+ +
+
+state_space
+

Get the state space of the environment

+ +++ + + + +
Returns:the observation space
+
+ +
+
+step(action: Union[int, float, numpy.ndarray, List]) → rl_coach.core_types.EnvResponse[source]
+

Make a single step in the environment using the given action

+ +++ + + + + + +
Parameters:action – an action to use for stepping the environment. Should follow the definition of the action space.
Returns:the environment response as returned in get_last_env_response
+
+ +
+ +
+

DeepMind Control Suite

+

A set of reinforcement learning environments powered by the MuJoCo physics engine.

+

Website: DeepMind Control Suite

+
+
+class rl_coach.environments.control_suite_environment.ControlSuiteEnvironment(level: rl_coach.environments.environment.LevelSelection, frame_skip: int, visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, seed: Union[None, int] = None, human_control: bool = False, observation_type: rl_coach.environments.control_suite_environment.ObservationType = <ObservationType.Measurements: 1>, custom_reward_threshold: Union[int, float] = None, **kwargs)[source]
+
+++ + + + +
Parameters:
    +
  • level – (str) +A string representing the control suite level to run. This can also be a LevelSelection object. +For example, cartpole:swingup.
  • +
  • frame_skip – (int) +The number of frames to skip between any two actions given by the agent. The action will be repeated +for all the skipped frames.
  • +
  • visualization_parameters – (VisualizationParameters) +The parameters used for visualizing the environment, such as the render flag, storing videos etc.
  • +
  • target_success_rate – (float) +Stop experiment if given target success rate was achieved.
  • +
  • seed – (int) +A seed to use for the random number generator when running the environment.
  • +
  • human_control – (bool) +A flag that allows controlling the environment using the keyboard keys.
  • +
  • observation_type – (ObservationType) +An enum which defines which observation to use. The current options are to use: +* Measurements only - a vector of joint torques and similar measurements +* Image only - an image of the environment as seen by a camera attached to the simulator +* Measurements & Image - both type of observations will be returned in the state using the keys +‘measurements’ and ‘pixels’ respectively.
  • +
  • custom_reward_threshold – (float) +Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.
  • +
+
+
+ +
+
+

Blizzard Starcraft II

+

A popular strategy game which was wrapped with a python interface by DeepMind.

+

Website: Blizzard Starcraft II

+
+
+class rl_coach.environments.starcraft2_environment.StarCraft2Environment(level: rl_coach.environments.environment.LevelSelection, frame_skip: int, visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, screen_size: int = 84, minimap_size: int = 64, feature_minimap_maps_to_use: List = range(0, 7), feature_screen_maps_to_use: List = range(0, 17), observation_type: rl_coach.environments.starcraft2_environment.StarcraftObservationType = <StarcraftObservationType.Features: 0>, disable_fog: bool = False, auto_select_all_army: bool = True, use_full_action_space: bool = False, **kwargs)[source]
+
+ +
+
+

ViZDoom

+

A Doom-based AI research platform for reinforcement learning from raw visual information.

+

Website: ViZDoom

+
+
+class rl_coach.environments.doom_environment.DoomEnvironment(level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, cameras: List[rl_coach.environments.doom_environment.DoomEnvironment.CameraTypes], target_success_rate: float = 1.0, **kwargs)[source]
+
+++ + + + +
Parameters:
    +
  • level – (str) +A string representing the doom level to run. This can also be a LevelSelection object. +This should be one of the levels defined in the DoomLevel enum. For example, HEALTH_GATHERING.
  • +
  • seed – (int) +A seed to use for the random number generator when running the environment.
  • +
  • frame_skip – (int) +The number of frames to skip between any two actions given by the agent. The action will be repeated +for all the skipped frames.
  • +
  • human_control – (bool) +A flag that allows controlling the environment using the keyboard keys.
  • +
  • custom_reward_threshold – (float) +Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.
  • +
  • visualization_parameters – (VisualizationParameters) +The parameters used for visualizing the environment, such as the render flag, storing videos etc.
  • +
  • cameras

    (List[CameraTypes]) +A list of camera types to use as observation in the state returned from the environment. +Each camera should be an enum from CameraTypes, and there are several options like an RGB observation, +a depth map, a segmentation map, and a top down map of the enviornment.

    +
    +
    +++ + + + + +
    param target_success_rate:
     (float) +Stop experiment if given target success rate was achieved.
    +
    +
  • +
+
+
+ +
+
+

CARLA

+

An open-source simulator for autonomous driving research.

+

Website: CARLA

+
+
+class rl_coach.environments.carla_environment.CarlaEnvironment(level: rl_coach.environments.environment.LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: rl_coach.base_parameters.VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, verbose: bool, experiment_suite: carla.driving_benchmark.experiment_suites.experiment_suite.ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: rl_coach.environments.carla_environment.CarlaEnvironmentParameters.Quality, cameras: List[rl_coach.environments.carla_environment.CameraTypes], weather_id: List[int], experiment_path: str, separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, target_success_rate: float = 1.0, **kwargs)[source]
+
+ +
+
+

OpenAI Gym

+

A library which consists of a set of environments, from games to robotics. +Additionally, it can be extended using the API defined by the authors.

+

Website: OpenAI Gym

+

In Coach, we support all the native environments in Gym, along with several extensions such as:

+
    +
  • Roboschool - a set of environments powered by the PyBullet engine, +that offer a free alternative to MuJoCo.
  • +
  • Gym Extensions - a set of environments that extends Gym for +auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.)
  • +
  • PyBullet - a physics engine that +includes a set of robotics environments.
  • +
+
+
+class rl_coach.environments.gym_environment.GymEnvironment(level: rl_coach.environments.environment.LevelSelection, frame_skip: int, visualization_parameters: rl_coach.base_parameters.VisualizationParameters, target_success_rate: float = 1.0, additional_simulator_parameters: Dict[str, Any] = {}, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, **kwargs)[source]
+
+++ + + + +
Parameters:
    +
  • level – (str) +A string representing the gym level to run. This can also be a LevelSelection object. +For example, BreakoutDeterministic-v0
  • +
  • frame_skip – (int) +The number of frames to skip between any two actions given by the agent. The action will be repeated +for all the skipped frames.
  • +
  • visualization_parameters – (VisualizationParameters) +The parameters used for visualizing the environment, such as the render flag, storing videos etc.
  • +
  • additional_simulator_parameters – (Dict[str, Any]) +Any additional parameters that the user can pass to the Gym environment. These parameters should be +accepted by the __init__ function of the implemented Gym environment.
  • +
  • seed – (int) +A seed to use for the random number generator when running the environment.
  • +
  • human_control – (bool) +A flag that allows controlling the environment using the keyboard keys.
  • +
  • custom_reward_threshold – (float) +Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. +If not set, this value will be taken from the Gym environment definition.
  • +
  • random_initialization_steps – (int) +The number of random steps that will be taken in the environment after each reset. +This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees.
  • +
  • max_over_num_frames – (int) +This value will be used for merging multiple frames into a single frame by taking the maximum value for each +of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects +can be seen in one frame but disappear in the next.
  • +
+
+
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/exploration_policies/index.html b/docs/components/exploration_policies/index.html new file mode 100644 index 0000000..aa40578 --- /dev/null +++ b/docs/components/exploration_policies/index.html @@ -0,0 +1,663 @@ + + + + + + + + + + + Exploration Policies — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Exploration Policies

+

Exploration policies are a component that allow the agent to tradeoff exploration and exploitation according to a +predefined policy. This is one of the most important aspects of reinforcement learning agents, and can require some +tuning to get it right. Coach supports several pre-defined exploration policies, and it can be easily extended with +custom policies. Note that not all exploration policies are expected to work for both discrete and continuous action +spaces.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Exploration PolicyDiscrete Action SpaceBox Action Space
AdditiveNoiseXV
BoltzmannVX
BootstrappedVX
CategoricalVX
ContinuousEntropyXV
EGreedyVV
GreedyVV
OUProcessXV
ParameterNoiseVV
TruncatedNormalXV
UCBVX
+
+

ExplorationPolicy

+
+
+class rl_coach.exploration_policies.ExplorationPolicy(action_space: rl_coach.spaces.ActionSpace)[source]
+

An exploration policy takes the predicted actions or action values from the agent, and selects the action to +actually apply to the environment using some predefined algorithm.

+ +++ + + + +
Parameters:action_space – the action space used by the environment
+
+
+change_phase(phase)[source]
+

Change between running phases of the algorithm +:param phase: Either Heatup or Train +:return: none

+
+ +
+
+get_action(action_values: List[Union[int, float, numpy.ndarray, List]]) → Union[int, float, numpy.ndarray, List][source]
+

Given a list of values corresponding to each action, +choose one actions according to the exploration policy +:param action_values: A list of action values +:return: The chosen action

+
+ +
+
+requires_action_values() → bool[source]
+

Allows exploration policies to define if they require the action values for the current step. +This can save up a lot of computation. For example in e-greedy, if the random value generated is smaller +than epsilon, the action is completely random, and the action values don’t need to be calculated +:return: True if the action values are required. False otherwise

+
+ +
+
+reset()[source]
+

Used for resetting the exploration policy parameters when needed +:return: None

+
+ +
+ +
+
+

AdditiveNoise

+
+
+class rl_coach.exploration_policies.AdditiveNoise(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]
+

AdditiveNoise is an exploration policy intended for continuous action spaces. It takes the action from the agent +and adds a Gaussian distributed noise to it. The amount of noise added to the action follows the noise amount that +can be given in two different ways: +1. Specified by the user as a noise schedule which is taken in percentiles out of the action space size +2. Specified by the agents action. In case the agents action is a list with 2 values, the 1st one is assumed to +be the mean of the action, and 2nd is assumed to be its standard deviation.

+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • noise_percentage_schedule – the schedule for the noise variance percentage relative to the absolute range +of the action space
  • +
  • evaluation_noise_percentage – the noise variance percentage that will be used during evaluation phases
  • +
+
+
+ +
+
+

Boltzmann

+
+
+class rl_coach.exploration_policies.Boltzmann(action_space: rl_coach.spaces.ActionSpace, temperature_schedule: rl_coach.schedules.Schedule)[source]
+

The Boltzmann exploration policy is intended for discrete action spaces. It assumes that each of the possible +actions has some value assigned to it (such as the Q value), and uses a softmax function to convert these values +into a distribution over the actions. It then samples the action for playing out of the calculated distribution. +An additional temperature schedule can be given by the user, and will control the steepness of the softmax function.

+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • temperature_schedule – the schedule for the temperature parameter of the softmax
  • +
+
+
+ +
+
+

Bootstrapped

+
+
+class rl_coach.exploration_policies.Bootstrapped(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+

Bootstrapped exploration policy is currently only used for discrete action spaces along with the +Bootstrapped DQN agent. It assumes that there is an ensemble of network heads, where each one predicts the +values for all the possible actions. For each episode, a single head is selected to lead the agent, according +to its value predictions. In evaluation, the action is selected using a majority vote over all the heads +predictions.

+
+

Note

+

This exploration policy will only work for Discrete action spaces with Bootstrapped DQN style agents, +since it requires the agent to have a network with multiple heads.

+
+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • epsilon_schedule – a schedule for the epsilon values
  • +
  • evaluation_epsilon – the epsilon value to use for evaluation phases
  • +
  • continuous_exploration_policy_parameters – the parameters of the continuous exploration policy to use +if the e-greedy is used for a continuous policy
  • +
  • architecture_num_q_heads – the number of q heads to select from
  • +
+
+
+ +
+
+

Categorical

+
+
+class rl_coach.exploration_policies.Categorical(action_space: rl_coach.spaces.ActionSpace)[source]
+

Categorical exploration policy is intended for discrete action spaces. It expects the action values to +represent a probability distribution over the action, from which a single action will be sampled. +In evaluation, the action that has the highest probability will be selected. This is particularly useful for +actor-critic schemes, where the actors output is a probability distribution over the actions.

+ +++ + + + +
Parameters:action_space – the action space used by the environment
+
+ +
+
+

ContinuousEntropy

+
+
+class rl_coach.exploration_policies.ContinuousEntropy(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float)[source]
+

Continuous entropy is an exploration policy that is actually implemented as part of the network. +The exploration policy class is only a placeholder for choosing this policy. The exploration policy is +implemented by adding a regularization factor to the network loss, which regularizes the entropy of the action. +This exploration policy is only intended for continuous action spaces, and assumes that the entire calculation +is implemented as part of the head.

+
+

Warning

+

This exploration policy expects the agent or the network to implement the exploration functionality. +There are only a few heads that actually are relevant and implement the entropy regularization factor.

+
+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • noise_percentage_schedule – the schedule for the noise variance percentage relative to the absolute range +of the action space
  • +
  • evaluation_noise_percentage – the noise variance percentage that will be used during evaluation phases
  • +
+
+
+ +
+
+

EGreedy

+
+
+class rl_coach.exploration_policies.EGreedy(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+

e-greedy is an exploration policy that is intended for both discrete and continuous action spaces.

+

For discrete action spaces, it assumes that each action is assigned a value, and it selects the action with the +highest value with probability 1 - epsilon. Otherwise, it selects a action sampled uniformly out of all the +possible actions. The epsilon value is given by the user and can be given as a schedule. +In evaluation, a different epsilon value can be specified.

+

For continuous action spaces, it assumes that the mean action is given by the agent. With probability epsilon, +it samples a random action out of the action space bounds. Otherwise, it selects the action according to a +given continuous exploration policy, which is set to AdditiveNoise by default. In evaluation, the action is +always selected according to the given continuous exploration policy (where its phase is set to evaluation as well).

+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • epsilon_schedule – a schedule for the epsilon values
  • +
  • evaluation_epsilon – the epsilon value to use for evaluation phases
  • +
  • continuous_exploration_policy_parameters – the parameters of the continuous exploration policy to use +if the e-greedy is used for a continuous policy
  • +
+
+
+ +
+
+

Greedy

+
+
+class rl_coach.exploration_policies.Greedy(action_space: rl_coach.spaces.ActionSpace)[source]
+

The Greedy exploration policy is intended for both discrete and continuous action spaces. +For discrete action spaces, it always selects the action with the maximum value, as given by the agent. +For continuous action spaces, it always return the exact action, as it was given by the agent.

+ +++ + + + +
Parameters:action_space – the action space used by the environment
+
+ +
+
+

OUProcess

+
+
+class rl_coach.exploration_policies.OUProcess(action_space: rl_coach.spaces.ActionSpace, mu: float = 0, theta: float = 0.15, sigma: float = 0.2, dt: float = 0.01)[source]
+

OUProcess exploration policy is intended for continuous action spaces, and selects the action according to +an Ornstein-Uhlenbeck process. The Ornstein-Uhlenbeck process implements the action as a Gaussian process, where +the samples are correlated between consequent time steps.

+ +++ + + + +
Parameters:action_space – the action space used by the environment
+
+ +
+
+

ParameterNoise

+
+
+class rl_coach.exploration_policies.ParameterNoise(network_params: Dict[str, rl_coach.base_parameters.NetworkParameters], action_space: rl_coach.spaces.ActionSpace)[source]
+

The ParameterNoise exploration policy is intended for both discrete and continuous action spaces. +It applies the exploration policy by replacing all the dense network layers with noisy layers. +The noisy layers have both weight means and weight standard deviations, and for each forward pass of the network +the weights are sampled from a normal distribution that follows the learned weights mean and standard deviation +values.

+

Warning: currently supported only by DQN variants

+ +++ + + + +
Parameters:action_space – the action space used by the environment
+
+ +
+
+

TruncatedNormal

+
+
+class rl_coach.exploration_policies.TruncatedNormal(action_space: rl_coach.spaces.ActionSpace, noise_percentage_schedule: rl_coach.schedules.Schedule, evaluation_noise_percentage: float, clip_low: float, clip_high: float)[source]
+

The TruncatedNormal exploration policy is intended for continuous action spaces. It samples the action from a +normal distribution, where the mean action is given by the agent, and the standard deviation can be given in t +wo different ways: +1. Specified by the user as a noise schedule which is taken in percentiles out of the action space size +2. Specified by the agents action. In case the agents action is a list with 2 values, the 1st one is assumed to +be the mean of the action, and 2nd is assumed to be its standard deviation. +When the sampled action is outside of the action bounds given by the user, it is sampled again and again, until it +is within the bounds.

+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • noise_percentage_schedule – the schedule for the noise variance percentage relative to the absolute range +of the action space
  • +
  • evaluation_noise_percentage – the noise variance percentage that will be used during evaluation phases
  • +
+
+
+ +
+
+

UCB

+
+
+class rl_coach.exploration_policies.UCB(action_space: rl_coach.spaces.ActionSpace, epsilon_schedule: rl_coach.schedules.Schedule, evaluation_epsilon: float, architecture_num_q_heads: int, lamb: int, continuous_exploration_policy_parameters: rl_coach.exploration_policies.exploration_policy.ExplorationParameters = <rl_coach.exploration_policies.additive_noise.AdditiveNoiseParameters object>)[source]
+

UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces. +It assumes that there are multiple network heads that are predicting action values, and that the standard deviation +between the heads predictions represents the uncertainty of the agent in each of the actions. +It then updates the action value estimates to by mean(actions)+lambda*stdev(actions), where lambda is +given by the user. This exploration policy aims to take advantage of the uncertainty of the agent in its predictions, +and select the action according to the tradeoff between how uncertain the agent is, and how large it predicts +the outcome from those actions to be.

+ +++ + + + +
Parameters:
    +
  • action_space – the action space used by the environment
  • +
  • epsilon_schedule – a schedule for the epsilon values
  • +
  • evaluation_epsilon – the epsilon value to use for evaluation phases
  • +
  • architecture_num_q_heads – the number of q heads to select from
  • +
  • lamb – lambda coefficient for taking the standard deviation into account
  • +
  • continuous_exploration_policy_parameters – the parameters of the continuous exploration policy to use +if the e-greedy is used for a continuous policy
  • +
+
+
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/filters/index.html b/docs/components/filters/index.html new file mode 100644 index 0000000..52f38e7 --- /dev/null +++ b/docs/components/filters/index.html @@ -0,0 +1,266 @@ + + + + + + + + + + + Filters — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Filters

+
+

Filters

+ +
+

Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information. +There are two filter categories -

+
    +
  • Input filters - these are filters that process the information passed into the agent from the environment. +This information includes the observation and the reward. Input filters therefore allow rescaling observations, +normalizing rewards, stack observations, etc.
  • +
  • Output filters - these are filters that process the information going out of the agent into the environment. +This information includes the action the agent chooses to take. Output filters therefore allow conversion of +actions from one space into another. For example, the agent can take \(N\) discrete actions, that will be mapped by +the output filter onto \(N\) continuous actions.
  • +
+

Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs.

+../../_images/filters.png +
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/filters/input_filters.html b/docs/components/filters/input_filters.html new file mode 100644 index 0000000..bd23b26 --- /dev/null +++ b/docs/components/filters/input_filters.html @@ -0,0 +1,587 @@ + + + + + + + + + + + Input Filters — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Input Filters

+

The input filters are separated into two categories - observation filters and reward filters.

+
+

Observation Filters

+
+

ObservationClippingFilter

+
+
+class rl_coach.filters.observation.ObservationClippingFilter(clipping_low: float = -inf, clipping_high: float = inf)[source]
+

Clips the observation values to a given range of values. +For example, if the observation consists of measurements in an arbitrary range, +and we want to control the minimum and maximum values of these observations, +we can define a range and clip the values of the measurements.

+ +++ + + + +
Parameters:
    +
  • clipping_low – The minimum value to allow after normalizing the observation
  • +
  • clipping_high – The maximum value to allow after normalizing the observation
  • +
+
+
+ +
+
+

ObservationCropFilter

+
+
+class rl_coach.filters.observation.ObservationCropFilter(crop_low: numpy.ndarray = None, crop_high: numpy.ndarray = None)[source]
+

Crops the size of the observation to a given crop window. For example, in Atari, the +observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a +square of 160x160 before rescaling them.

+ +++ + + + +
Parameters:
    +
  • crop_low – a vector where each dimension describes the start index for cropping the observation in the +corresponding dimension. a negative value of -1 will be mapped to the max size
  • +
  • crop_high – a vector where each dimension describes the end index for cropping the observation in the +corresponding dimension. a negative value of -1 will be mapped to the max size
  • +
+
+
+ +
+
+

ObservationMoveAxisFilter

+
+
+class rl_coach.filters.observation.ObservationMoveAxisFilter(axis_origin: int = None, axis_target: int = None)[source]
+

Reorders the axes of the observation. This can be useful when the observation is an +image, and we want to move the channel axis to be the last axis instead of the first axis.

+ +++ + + + +
Parameters:
    +
  • axis_origin – The axis to move
  • +
  • axis_target – Where to move the selected axis to
  • +
+
+
+ +
+
+

ObservationNormalizationFilter

+
+
+class rl_coach.filters.observation.ObservationNormalizationFilter(clip_min: float = -5.0, clip_max: float = 5.0, name='observation_stats')[source]
+

Normalizes the observation values with a running mean and standard deviation of +all the observations seen so far. The normalization is performed element-wise. Additionally, when working with +multiple workers, the statistics used for the normalization operation are accumulated over all the workers.

+ +++ + + + +
Parameters:
    +
  • clip_min – The minimum value to allow after normalizing the observation
  • +
  • clip_max – The maximum value to allow after normalizing the observation
  • +
+
+
+ +
+
+

ObservationReductionBySubPartsNameFilter

+
+
+class rl_coach.filters.observation.ObservationReductionBySubPartsNameFilter(part_names: List[str], reduction_method: rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter.ObservationReductionBySubPartsNameFilter.ReductionMethod)[source]
+

Allows keeping only parts of the observation, by specifying their +name. This is useful when the environment has a measurements vector as observation which includes several different +measurements, but you want the agent to only see some of the measurements and not all. +For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as +speed and location. If we want to only use the speed, it can be done using this filter. +This will currently work only for VectorObservationSpace observations

+ +++ + + + +
Parameters:
    +
  • part_names – A list of part names to reduce
  • +
  • reduction_method – A reduction method to use - keep or discard the given parts
  • +
+
+
+ +
+
+

ObservationRescaleSizeByFactorFilter

+
+
+class rl_coach.filters.observation.ObservationRescaleSizeByFactorFilter(rescale_factor: float, rescaling_interpolation_type: rl_coach.filters.observation.observation_rescale_size_by_factor_filter.RescaleInterpolationType)[source]
+

Rescales an image observation by some factor. For example, the image size +can be reduced by a factor of 2. +Warning: this requires the input observation to be of type uint8 due to scipy requirements!

+ +++ + + + +
Parameters:
    +
  • rescale_factor – the factor by which the observation will be rescaled
  • +
  • rescaling_interpolation_type – the interpolation type for rescaling
  • +
+
+
+ +
+
+

ObservationRescaleToSizeFilter

+
+
+class rl_coach.filters.observation.ObservationRescaleToSizeFilter(output_observation_space: rl_coach.spaces.PlanarMapsObservationSpace, rescaling_interpolation_type: rl_coach.filters.observation.observation_rescale_to_size_filter.RescaleInterpolationType = <RescaleInterpolationType.BILINEAR: 'bilinear'>)[source]
+

Rescales an image observation to a given size. The target size does not +necessarily keep the aspect ratio of the original observation. +Warning: this requires the input observation to be of type uint8 due to scipy requirements!

+ +++ + + + +
Parameters:
    +
  • output_observation_space – the output observation space
  • +
  • rescaling_interpolation_type – the interpolation type for rescaling
  • +
+
+
+ +
+
+

ObservationRGBToYFilter

+
+
+class rl_coach.filters.observation.ObservationRGBToYFilter[source]
+

Converts a color image observation specified using the RGB encoding into a grayscale +image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors +in the original image are not relevant for solving the task at hand. +The channels axis is assumed to be the last axis

+
+ +
+
+

ObservationSqueezeFilter

+
+
+class rl_coach.filters.observation.ObservationSqueezeFilter(axis: int = None)[source]
+

Removes redundant axes from the observation, which are axes with a dimension of 1.

+ +++ + + + +
Parameters:axis – Specifies which axis to remove. If set to None, all the axes of size 1 will be removed.
+
+ +
+
+

ObservationStackingFilter

+
+
+class rl_coach.filters.observation.ObservationStackingFilter(stack_size: int, stacking_axis: int = -1)[source]
+

Stacks several observations on top of each other. For image observation this will +create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this, +a LazyStack object is used in order to wrap the observations in the stack. For this reason, the +ObservationStackingFilter must be the last filter in the inputs filters stack. +This filter is stateful since it stores the previous step result and depends on it. +The filter adds an additional dimension to the output observation.

+

Warning!!! The filter replaces the observation with a LazyStack object, so no filters should be +applied after this filter. applying more filters will cause the LazyStack object to be converted to a numpy array +and increase the memory footprint.

+ +++ + + + +
Parameters:
    +
  • stack_size – the number of previous observations in the stack
  • +
  • stacking_axis – the axis on which to stack the observation on
  • +
+
+
+ +
+
+

ObservationToUInt8Filter

+
+
+class rl_coach.filters.observation.ObservationToUInt8Filter(input_low: float, input_high: float)[source]
+

Converts a floating point observation into an unsigned int 8 bit observation. This is +mostly useful for reducing memory consumption and is usually used for image observations. The filter will first +spread the observation values over the range 0-255 and then discretize them into integer values.

+ +++ + + + +
Parameters:
    +
  • input_low – The lowest value currently present in the observation
  • +
  • input_high – The highest value currently present in the observation
  • +
+
+
+ +
+
+
+

Reward Filters

+
+

RewardClippingFilter

+
+
+class rl_coach.filters.reward.RewardClippingFilter(clipping_low: float = -inf, clipping_high: float = inf)[source]
+

Clips the reward values into a given range. For example, in DQN, the Atari rewards are +clipped into the range -1 and 1 in order to control the scale of the returns.

+ +++ + + + +
Parameters:
    +
  • clipping_low – The low threshold for reward clipping
  • +
  • clipping_high – The high threshold for reward clipping
  • +
+
+
+ +
+
+

RewardNormalizationFilter

+
+
+class rl_coach.filters.reward.RewardNormalizationFilter(clip_min: float = -5.0, clip_max: float = 5.0)[source]
+

Normalizes the reward values with a running mean and standard deviation of +all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation +are accumulated over all the workers.

+ +++ + + + +
Parameters:
    +
  • clip_min – The minimum value to allow after normalizing the reward
  • +
  • clip_max – The maximum value to allow after normalizing the reward
  • +
+
+
+ +
+
+

RewardRescaleFilter

+
+
+class rl_coach.filters.reward.RewardRescaleFilter(rescale_factor: float)[source]
+

Rescales the reward by a given factor. Rescaling the rewards of the environment has been +observed to have a large effect (negative or positive) on the behavior of the learning process.

+ +++ + + + +
Parameters:rescale_factor – The reward rescaling factor by which the reward will be multiplied
+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/filters/output_filters.html b/docs/components/filters/output_filters.html new file mode 100644 index 0000000..65b158a --- /dev/null +++ b/docs/components/filters/output_filters.html @@ -0,0 +1,384 @@ + + + + + + + + + + + Output Filters — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Output Filters

+

The output filters only process the actions.

+
+

Action Filters

+
+
+class rl_coach.filters.action.AttentionDiscretization(num_bins_per_dimension: Union[int, List[int]], force_int_bins=False)[source]
+

Discretizes an AttentionActionSpace. The attention action space defines the actions +as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing +a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop +windows to choose into a finite number of options, and map a discrete action space into those crop windows.

+

Warning! this will currently only work for attention spaces with 2 dimensions.

+ +++ + + + +
Parameters:
    +
  • num_bins_per_dimension – Number of discrete bins to use for each dimension of the action space
  • +
  • force_int_bins – If set to True, all the bins will represent integer coordinates in space.
  • +
+
+
+ +../../_images/attention_discretization.png +
+
+class rl_coach.filters.action.BoxDiscretization(num_bins_per_dimension: Union[int, List[int]], force_int_bins=False)[source]
+

Discretizes a continuous action space into a discrete action space, allowing the usage of +agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the +original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete +action index. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space. +For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action +space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1.

+ +++ + + + +
Parameters:
    +
  • num_bins_per_dimension – The number of bins to use for each dimension of the target action space. +The bins will be spread out uniformly over this space
  • +
  • force_int_bins – force the bins to represent only integer actions. for example, if the action space is in +the range 0-10 and there are 5 bins, then the bins will be placed at 0, 2, 5, 7, 10, +instead of 0, 2.5, 5, 7.5, 10.
  • +
+
+
+ +../../_images/box_discretization.png +
+
+class rl_coach.filters.action.BoxMasking(masked_target_space_low: Union[None, int, float, numpy.ndarray], masked_target_space_high: Union[None, int, float, numpy.ndarray])[source]
+

Masks part of the action space to enforce the agent to work in a defined space. For example, +if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions +to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent. +The resulting action space will be shifted and will always start from 0 and have the size of the unmasked area.

+ +++ + + + +
Parameters:
    +
  • masked_target_space_low – the lowest values that can be chosen in the target action space
  • +
  • masked_target_space_high – the highest values that can be chosen in the target action space
  • +
+
+
+ +../../_images/box_masking.png +
+
+class rl_coach.filters.action.PartialDiscreteActionSpaceMap(target_actions: List[Union[int, float, numpy.ndarray, List]] = None, descriptions: List[str] = None)[source]
+

Partial map of two countable action spaces. For example, consider an environment +with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual +MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can +map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to +use regular discrete actions, and mask 3 of the actions from the agent.

+ +++ + + + +
Parameters:
    +
  • target_actions – A partial list of actions from the target space to map to.
  • +
  • descriptions – a list of descriptions of each of the actions
  • +
+
+
+ +../../_images/partial_discrete_action_space_map.png +
+
+class rl_coach.filters.action.FullDiscreteActionSpaceMap[source]
+

Full map of two countable action spaces. This works in a similar way to the +PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without +masking any actions. +For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those +multiselect actions.

+
+ +../../_images/full_discrete_action_space_map.png +
+
+class rl_coach.filters.action.LinearBoxToBoxMap(input_space_low: Union[None, int, float, numpy.ndarray], input_space_high: Union[None, int, float, numpy.ndarray])[source]
+

A linear mapping of two box action spaces. For example, if the action space of the +environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1, +the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the +action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped +between those values.

+ +++ + + + +
Parameters:
    +
  • input_space_low – the low values of the desired action space
  • +
  • input_space_high – the high values of the desired action space
  • +
+
+
+ +../../_images/linear_box_to_box_map.png +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/memories/index.html b/docs/components/memories/index.html new file mode 100644 index 0000000..aae392c --- /dev/null +++ b/docs/components/memories/index.html @@ -0,0 +1,431 @@ + + + + + + + + + + + Memories — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Memories

+
+

Episodic Memories

+
+

EpisodicExperienceReplay

+
+
+class rl_coach.memories.episodic.EpisodicExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int] = (<MemoryGranularity.Transitions: 0>, 1000000), n_step=-1)[source]
+

A replay buffer that stores episodes of transitions. The additional structure allows performing various +calculations of total return and other values that depend on the sequential behavior of the transitions +in the episode.

+ +++ + + + +
Parameters:max_size – the maximum number of transitions or episodes to hold in the memory
+
+ +
+
+

EpisodicHindsightExperienceReplay

+
+
+class rl_coach.memories.episodic.EpisodicHindsightExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace)[source]
+

Implements Hindsight Experience Replay as described in the following paper: https://arxiv.org/pdf/1707.01495.pdf

+ +++ + + + +
Parameters:
    +
  • max_size – The maximum size of the memory. should be defined in a granularity of Transitions
  • +
  • hindsight_transitions_per_regular_transition – The number of hindsight artificial transitions to generate +for each actual transition
  • +
  • hindsight_goal_selection_method – The method that will be used for generating the goals for the +hindsight transitions. Should be one of HindsightGoalSelectionMethod
  • +
  • goals_space – A GoalsSpace which defines the base properties of the goals space
  • +
+
+
+ +
+
+

EpisodicHRLHindsightExperienceReplay

+
+
+class rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], hindsight_transitions_per_regular_transition: int, hindsight_goal_selection_method: rl_coach.memories.episodic.episodic_hindsight_experience_replay.HindsightGoalSelectionMethod, goals_space: rl_coach.spaces.GoalsSpace)[source]
+

Implements HRL Hindsight Experience Replay as described in the following paper: https://arxiv.org/abs/1805.08180

+

This is the memory you should use if you want a shared hindsight experience replay buffer between multiple workers

+ +++ + + + +
Parameters:
    +
  • max_size – The maximum size of the memory. should be defined in a granularity of Transitions
  • +
  • hindsight_transitions_per_regular_transition – The number of hindsight artificial transitions to generate +for each actual transition
  • +
  • hindsight_goal_selection_method – The method that will be used for generating the goals for the +hindsight transitions. Should be one of HindsightGoalSelectionMethod
  • +
  • goals_space – A GoalsSpace which defines the properties of the goals
  • +
  • do_action_hindsight – Replace the action (sub-goal) given to a lower layer, with the actual achieved goal
  • +
+
+
+ +
+
+

SingleEpisodeBuffer

+
+
+class rl_coach.memories.episodic.SingleEpisodeBuffer[source]
+
+ +
+
+
+

Non-Episodic Memories

+
+

BalancedExperienceReplay

+
+
+class rl_coach.memories.non_episodic.BalancedExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True, num_classes: int = 0, state_key_with_the_class_index: Any = 'class')[source]
+
+++ + + + +
Parameters:
    +
  • max_size – the maximum number of transitions or episodes to hold in the memory
  • +
  • allow_duplicates_in_batch_sampling – allow having the same transition multiple times in a batch
  • +
  • num_classes – the number of classes in the replayed data
  • +
  • state_key_with_the_class_index – the class index is assumed to be a value in the state dictionary. +this parameter determines the key to retrieve the class index value
  • +
+
+
+ +
+
+

QDND

+
+
+class rl_coach.memories.non_episodic.QDND(dict_size, key_width, num_actions, new_value_shift_coefficient=0.1, key_error_threshold=0.01, learning_rate=0.01, num_neighbors=50, return_additional_data=False, override_existing_keys=False, rebuild_on_every_update=False)[source]
+
+ +
+
+

ExperienceReplay

+
+
+class rl_coach.memories.non_episodic.ExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], allow_duplicates_in_batch_sampling: bool = True)[source]
+

A regular replay buffer which stores transition without any additional structure

+ +++ + + + +
Parameters:
    +
  • max_size – the maximum number of transitions or episodes to hold in the memory
  • +
  • allow_duplicates_in_batch_sampling – allow having the same transition multiple times in a batch
  • +
+
+
+ +
+
+

PrioritizedExperienceReplay

+
+
+class rl_coach.memories.non_episodic.PrioritizedExperienceReplay(max_size: Tuple[rl_coach.memories.memory.MemoryGranularity, int], alpha: float = 0.6, beta: rl_coach.schedules.Schedule = <rl_coach.schedules.ConstantSchedule object>, epsilon: float = 1e-06, allow_duplicates_in_batch_sampling: bool = True)[source]
+

This is the proportional sampling variant of the prioritized experience replay as described +in https://arxiv.org/pdf/1511.05952.pdf.

+ +++ + + + +
Parameters:
    +
  • max_size – the maximum number of transitions or episodes to hold in the memory
  • +
  • alpha – the alpha prioritization coefficient
  • +
  • beta – the beta parameter used for importance sampling
  • +
  • epsilon – a small value added to the priority of each transition
  • +
  • allow_duplicates_in_batch_sampling – allow having the same transition multiple times in a batch
  • +
+
+
+ +
+
+

TransitionCollection

+
+
+class rl_coach.memories.non_episodic.TransitionCollection[source]
+

Simple python implementation of transitions collection non-episodic memories +are constructed on top of.

+
+ +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/components/spaces.html b/docs/components/spaces.html new file mode 100644 index 0000000..5368291 --- /dev/null +++ b/docs/components/spaces.html @@ -0,0 +1,720 @@ + + + + + + + + + + + Spaces — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Spaces

+
+

Space

+
+
+class rl_coach.spaces.Space(shape: Union[int, tuple, list, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf)[source]
+

A space defines a set of valid values

+ +++ + + + +
Parameters:
    +
  • shape – the shape of the space
  • +
  • low – the lowest values possible in the space. can be an array defining the lowest values per point, +or a single value defining the general lowest values
  • +
  • high – the highest values possible in the space. can be an array defining the highest values per point, +or a single value defining the general highest values
  • +
+
+
+
+is_point_in_space_shape(point: numpy.ndarray) → bool[source]
+

Checks if a given multidimensional point is within the bounds of the shape of the space

+ +++ + + + + + +
Parameters:point – a multidimensional point
Returns:True if the point is within the shape of the space. False otherwise
+
+ +
+
+sample() → numpy.ndarray[source]
+

Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no +bounds are defined

+ +++ + + + +
Returns:A numpy array sampled from the space
+
+ +
+
+val_matches_space_definition(val: Union[int, float, numpy.ndarray]) → bool[source]
+

Checks if the given value matches the space definition in terms of shape and values

+ +++ + + + + + +
Parameters:val – a value to check
Returns:True / False depending on if the val matches the space definition
+
+ +
+ +
+
+

Observation Spaces

+
+
+class rl_coach.spaces.ObservationSpace(shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf)[source]
+
+
+is_point_in_space_shape(point: numpy.ndarray) → bool
+

Checks if a given multidimensional point is within the bounds of the shape of the space

+ +++ + + + + + +
Parameters:point – a multidimensional point
Returns:True if the point is within the shape of the space. False otherwise
+
+ +
+
+sample() → numpy.ndarray
+

Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no +bounds are defined

+ +++ + + + +
Returns:A numpy array sampled from the space
+
+ +
+
+val_matches_space_definition(val: Union[int, float, numpy.ndarray]) → bool
+

Checks if the given value matches the space definition in terms of shape and values

+ +++ + + + + + +
Parameters:val – a value to check
Returns:True / False depending on if the val matches the space definition
+
+ +
+ +
+

VectorObservationSpace

+
+
+class rl_coach.spaces.VectorObservationSpace(shape: int, low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, measurements_names: List[str] = None)[source]
+

An observation space which is defined as a vector of elements. This can be particularly useful for environments +which return measurements, such as in robotic environmnets.

+
+ +
+
+

PlanarMapsObservationSpace

+
+
+class rl_coach.spaces.PlanarMapsObservationSpace(shape: numpy.ndarray, low: int, high: int, channels_axis: int = -1)[source]
+

An observation space which defines a stack of 2D observations. For example, an environment which returns +a stack of segmentation maps like in Starcraft.

+
+ +
+
+

ImageObservationSpace

+
+
+class rl_coach.spaces.ImageObservationSpace(shape: numpy.ndarray, high: int, channels_axis: int = -1)[source]
+

An observation space which is a private case of the PlanarMapsObservationSpace, where the stack of 2D observations +represent a RGB image, or a grayscale image.

+
+ +
+
+
+

Action Spaces

+
+
+class rl_coach.spaces.ActionSpace(shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: Union[int, float, numpy.ndarray, List] = None)[source]
+
+
+clip_action_to_space(action: Union[int, float, numpy.ndarray, List]) → Union[int, float, numpy.ndarray, List][source]
+

Given an action, clip its values to fit to the action space ranges

+ +++ + + + + + +
Parameters:action – a given action
Returns:the clipped action
+
+ +
+
+is_point_in_space_shape(point: numpy.ndarray) → bool
+

Checks if a given multidimensional point is within the bounds of the shape of the space

+ +++ + + + + + +
Parameters:point – a multidimensional point
Returns:True if the point is within the shape of the space. False otherwise
+
+ +
+
+sample() → numpy.ndarray
+

Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no +bounds are defined

+ +++ + + + +
Returns:A numpy array sampled from the space
+
+ +
+
+sample_with_info() → rl_coach.core_types.ActionInfo[source]
+

Get a random action with additional “fake” info

+ +++ + + + +
Returns:An action info instance
+
+ +
+
+val_matches_space_definition(val: Union[int, float, numpy.ndarray]) → bool
+

Checks if the given value matches the space definition in terms of shape and values

+ +++ + + + + + +
Parameters:val – a value to check
Returns:True / False depending on if the val matches the space definition
+
+ +
+ +
+

AttentionActionSpace

+
+
+class rl_coach.spaces.AttentionActionSpace(shape: int, low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None, forced_attention_size: Union[None, int, float, numpy.ndarray] = None)[source]
+

A box selection continuous action space, meaning that the actions are defined as selecting a multidimensional box +from a given range. +The actions will be in the form: +[[low_x, low_y, …], [high_x, high_y, …]]

+
+ +
+
+

BoxActionSpace

+
+
+class rl_coach.spaces.BoxActionSpace(shape: Union[int, numpy.ndarray], low: Union[None, int, float, numpy.ndarray] = -inf, high: Union[None, int, float, numpy.ndarray] = inf, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None)[source]
+

A multidimensional bounded or unbounded continuous action space

+
+ +
+
+

DiscreteActionSpace

+
+
+class rl_coach.spaces.DiscreteActionSpace(num_actions: int, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None)[source]
+

A discrete action space with action indices as actions

+
+ +
+
+

MultiSelectActionSpace

+
+
+class rl_coach.spaces.MultiSelectActionSpace(size: int, max_simultaneous_selected_actions: int = 1, descriptions: Union[None, List, Dict] = None, default_action: numpy.ndarray = None, allow_no_action_to_be_selected=True)[source]
+

A discrete action space where multiple actions can be selected at once. The actions are encoded as multi-hot vectors

+
+ +
+
+

CompoundActionSpace

+
+
+class rl_coach.spaces.CompoundActionSpace(sub_spaces: List[rl_coach.spaces.ActionSpace])[source]
+

An action space which consists of multiple sub-action spaces. +For example, in Starcraft the agent should choose an action identifier from ~550 options (Discrete(550)), +but it also needs to choose 13 different arguments for the selected action identifier, where each argument is +by itself an action space. In Starcraft, the arguments are Discrete action spaces as well, but this is not mandatory.

+
+ +
+
+
+

Goal Spaces

+
+
+class rl_coach.spaces.GoalsSpace(goal_name: str, reward_type: rl_coach.spaces.GoalToRewardConversion, distance_metric: Union[rl_coach.spaces.GoalsSpace.DistanceMetric, Callable])[source]
+

A multidimensional space with a goal type definition. It also behaves as an action space, so that hierarchical +agents can use it as an output action space. +The class acts as a wrapper to the target space. So after setting the target space, all the values of the class +will match the values of the target space (the shape, low, high, etc.)

+ +++ + + + +
Parameters:
    +
  • goal_name – the name of the observation space to use as the achieved goal.
  • +
  • reward_type – the reward type to use for converting distances from goal to rewards
  • +
  • distance_metric – the distance metric to use. could be either one of the distances in the +DistanceMetric enum, or a custom function that gets two vectors as input and +returns the distance between them
  • +
+
+
+
+class DistanceMetric[source]
+

An enumeration.

+
+ +
+
+clip_action_to_space(action: Union[int, float, numpy.ndarray, List]) → Union[int, float, numpy.ndarray, List]
+

Given an action, clip its values to fit to the action space ranges

+ +++ + + + + + +
Parameters:action – a given action
Returns:the clipped action
+
+ +
+
+distance_from_goal(goal: numpy.ndarray, state: dict) → float[source]
+

Given a state, check its distance from the goal

+ +++ + + + + + +
Parameters:
    +
  • goal – a numpy array representing the goal
  • +
  • state – a dict representing the state
  • +
+
Returns:

the distance from the goal

+
+
+ +
+
+get_reward_for_goal_and_state(goal: numpy.ndarray, state: dict) → Tuple[float, bool][source]
+

Given a state, check if the goal was reached and return a reward accordingly

+ +++ + + + + + +
Parameters:
    +
  • goal – a numpy array representing the goal
  • +
  • state – a dict representing the state
  • +
+
Returns:

the reward for the current goal and state pair and a boolean representing if the goal was reached

+
+
+ +
+
+goal_from_state(state: Dict)[source]
+

Given a state, extract an observation according to the goal_name

+ +++ + + + + + +
Parameters:state – a dictionary of observations
Returns:the observation corresponding to the goal_name
+
+ +
+
+is_point_in_space_shape(point: numpy.ndarray) → bool
+

Checks if a given multidimensional point is within the bounds of the shape of the space

+ +++ + + + + + +
Parameters:point – a multidimensional point
Returns:True if the point is within the shape of the space. False otherwise
+
+ +
+
+sample() → numpy.ndarray
+

Sample the defined space, either uniformly, if space bounds are defined, or Normal distributed if no +bounds are defined

+ +++ + + + +
Returns:A numpy array sampled from the space
+
+ +
+
+sample_with_info() → rl_coach.core_types.ActionInfo
+

Get a random action with additional “fake” info

+ +++ + + + +
Returns:An action info instance
+
+ +
+
+val_matches_space_definition(val: Union[int, float, numpy.ndarray]) → bool
+

Checks if the given value matches the space definition in terms of shape and values

+ +++ + + + + + +
Parameters:val – a value to check
Returns:True / False depending on if the val matches the space definition
+
+ +
+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/contributing/add_agent.html b/docs/contributing/add_agent.html new file mode 100644 index 0000000..2400842 --- /dev/null +++ b/docs/contributing/add_agent.html @@ -0,0 +1,313 @@ + + + + + + + + + + + Adding a New Agent — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Adding a New Agent

+

Coach’s modularity makes adding an agent a simple and clean task. +We suggest using the following +Jupyter notebook tutorial +to ramp up on this process. In general, it involves the following steps:

+
    +
  1. Implement your algorithm in a new file. The agent can inherit base classes such as ValueOptimizationAgent or +ActorCriticAgent, or the more generic Agent base class.

    +
    +

    Note

    +

    ValueOptimizationAgent, PolicyOptimizationAgent and Agent are abstract classes. +learn_from_batch() should be overriden with the desired behavior for the algorithm being implemented. +If deciding to inherit from Agent, also choose_action() should be overriden.

    +
    +
    def learn_from_batch(self, batch) -> Tuple[float, List, List]:
    +    """
    +    Given a batch of transitions, calculates their target values and updates the network.
    +    :param batch: A list of transitions
    +    :return: The total loss of the training, the loss per head and the unclipped gradients
    +    """
    +
    +def choose_action(self, curr_state):
    +    """
    +    choose an action to act with in the current episode being played. Different behavior might be exhibited when training
    +     or testing.
    +
    +    :param curr_state: the current state to act upon.
    +    :return: chosen action, some action value describing the action (q-value, probability, etc)
    +    """
    +
    +
    +
  2. +
  3. Implement your agent’s specific network head, if needed, at the implementation for the framework of your choice. +For example architectures/neon_components/heads.py. The head will inherit the generic base class Head. +A new output type should be added to configurations.py, and a mapping between the new head and output type should +be defined in the get_output_head() function at architectures/neon_components/general_network.py

    +
  4. +
  5. Define a new parameters class that inherits AgentParameters. +The parameters class defines all the hyperparameters for the agent, and is initialized with 4 main components:

    +
      +
    • algorithm: A class inheriting AlgorithmParameters which defines any algorithm specific parameters
    • +
    • exploration: A class inheriting ExplorationParameters which defines the exploration policy parameters. +There are several common exploration policies built-in which you can use, and are defined under +the exploration sub directory. You can also define your own custom exploration policy.
    • +
    • memory: A class inheriting MemoryParameters which defined the memory parameters. +There are several common memory types built-in which you can use, and are defined under the memories +sub directory. You can also define your own custom memory.
    • +
    • networks: A dictionary defining all the networks that will be used by the agent. The keys of the dictionary +define the network name and will be used to access each network through the agent class. +The dictionary values are a class inheriting NetworkParameters, which define the network structure +and parameters.
    • +
    +

    Additionally, set the path property to return the path to your agent class in the following format:

    +

    <path to python module>:<name of agent class>

    +

    For example,

    +
    class RainbowAgentParameters(AgentParameters):
    +def __init__(self):
    +    super().__init__(algorithm=RainbowAlgorithmParameters(),
    +                     exploration=RainbowExplorationParameters(),
    +                     memory=RainbowMemoryParameters(),
    +                     networks={"main": RainbowNetworkParameters()})
    +
    +@property
    +def path(self):
    +    return 'rainbow.rainbow_agent:RainbowAgent'
    +
    +
    +
  6. +
  7. (Optional) Define a preset using the new agent type with a given environment, and the hyper-parameters that should +be used for training on that environment.

    +
  8. +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/contributing/add_agent/index.html b/docs/contributing/add_agent/index.html deleted file mode 100644 index 89fd56e..0000000 --- a/docs/contributing/add_agent/index.html +++ /dev/null @@ -1,340 +0,0 @@ - - - - - - - - - - - Adding a New Agent - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Contributing »
  • - - - -
  • Adding a New Agent
  • -
  • - -
  • -
-
-
-
-
- - - -

Coach's modularity makes adding an agent a simple and clean task, that involves the following steps:

-
    -
  1. -

    Implement your algorithm in a new file. The agent can inherit base classes such as ValueOptimizationAgent or - ActorCriticAgent, or the more generic Agent base class.

    -
      -
    • ValueOptimizationAgent, PolicyOptimizationAgent and Agent are abstract classes. -learn_from_batch() should be overriden with the desired behavior for the algorithm being implemented. -If deciding to inherit from Agent, also choose_action() should be overriden.
      def learn_from_batch(self, batch) -> Tuple[float, List, List]:
      -    """
      -    Given a batch of transitions, calculates their target values and updates the network.
      -    :param batch: A list of transitions
      -    :return: The total loss of the training, the loss per head and the unclipped gradients
      -    """
      -
      -def choose_action(self, curr_state):
      -    """
      -    choose an action to act with in the current episode being played. Different behavior might be exhibited when training
      -     or testing.
      -
      -    :param curr_state: the current state to act upon.
      -    :return: chosen action, some action value describing the action (q-value, probability, etc)
      -    """
      -
      -
    • -
    -
  2. -
  3. -

    Implement your agent's specific network head, if needed, at the implementation for the framework of your choice. - For example architectures/neon_components/heads.py. The head will inherit the generic base class Head. - A new output type should be added to configurations.py, and a mapping between the new head and output type should - be defined in the get_output_head() function at architectures/neon_components/general_network.py

    -
  4. -
  5. -

    Define a new parameters class that inherits AgentParameters. - The parameters class defines all the hyperparameters for the agent, and is initialized with 4 main components:

    -
      -
    • algorithm: A class inheriting AlgorithmParameters which defines any algorithm specific parameters
    • -
    • exploration: A class inheriting ExplorationParameters which defines the exploration policy parameters. - There are several common exploration policies built-in which you can use, and are defined under - the exploration sub directory. You can also define your own custom exploration policy.
    • -
    • memory: A class inheriting MemoryParameters which defined the memory parameters. - There are several common memory types built-in which you can use, and are defined under the memories - sub directory. You can also define your own custom memory.
    • -
    • networks: A dictionary defining all the networks that will be used by the agent. The keys of the dictionary - define the network name and will be used to access each network through the agent class. - The dictionary values are a class inheriting NetworkParameters, which define the network structure - and parameters.
    • -
    -

    Additionally, set the path property to return the path to your agent class in the following format:

    -
        <path to python module>:<name of agent class>
    -
    -

    For example,

    -
        class RainbowAgentParameters(AgentParameters):
    -    def __init__(self):
    -        super().__init__(algorithm=RainbowAlgorithmParameters(),
    -                         exploration=RainbowExplorationParameters(),
    -                         memory=RainbowMemoryParameters(),
    -                         networks={"main": RainbowNetworkParameters()})
    -
    -    @property
    -    def path(self):
    -        return 'rainbow.rainbow_agent:RainbowAgent'
    -
    -
  6. -
  7. -

    (Optional) Define a preset using the new agent type with a given environment, and the hyper-parameters that should - be used for training on that environment.

    -
  8. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/contributing/add_env.html b/docs/contributing/add_env.html new file mode 100644 index 0000000..06dc927 --- /dev/null +++ b/docs/contributing/add_env.html @@ -0,0 +1,332 @@ + + + + + + + + + + + Adding a New Environment — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Adding a New Environment

+

Adding a new environment to Coach is as easy as solving CartPole.

+

There are essentially two ways to integrate new environments to Coach:

+
+

Using the OpenAI Gym API

+

If your environment is already using the OpenAI Gym API, you are already good to go. +When selecting the environment parameters in the preset, use GymEnvironmentParameters(), +and pass the path to your environment source code using the level parameter. +You can specify additional parameters for your environment using the additional_simulator_parameters parameter. +Take for example the definition used in the Pendulum_HAC preset:

+
env_params = GymEnvironmentParameters()
+env_params.level = "rl_coach.environments.mujoco.pendulum_with_goals:PendulumWithGoals"
+env_params.additional_simulator_parameters = {"time_limit": 1000}
+
+
+
+
+

Using the Coach API

+

There are a few simple steps to follow, and we will walk through them one by one. +As an alternative, we highly recommend following the corresponding +tutorial +in the GitHub repo.

+
    +
  1. Create a new class for your environment, and inherit the Environment class.

    +
  2. +
  3. Coach defines a simple API for implementing a new environment, which are defined in environment/environment.py. +There are several functions to implement, but only some of them are mandatory.

    +

    Here are the important ones:

    +
    def _take_action(self, action_idx: ActionType) -> None:
    +    """
    +    An environment dependent function that sends an action to the simulator.
    +    :param action_idx: the action to perform on the environment
    +    :return: None
    +    """
    +
    +def _update_state(self) -> None:
    +    """
    +    Updates the state from the environment.
    +    Should update self.observation, self.reward, self.done, self.measurements and self.info
    +    :return: None
    +    """
    +
    +def _restart_environment_episode(self, force_environment_reset=False) -> None:
    +    """
    +    Restarts the simulator episode
    +    :param force_environment_reset: Force the environment to reset even if the episode is not done yet.
    +    :return: None
    +    """
    +
    +def _render(self) -> None:
    +    """
    +    Renders the environment using the native simulator renderer
    +    :return: None
    +    """
    +
    +def get_rendered_image(self) -> np.ndarray:
    +    """
    +    Return a numpy array containing the image that will be rendered to the screen.
    +    This can be different from the observation. For example, mujoco's observation is a measurements vector.
    +    :return: numpy array containing the image that will be rendered to the screen
    +    """
    +
    +
    +
  4. +
  5. Create a new parameters class for your environment, which inherits the EnvironmentParameters class. +In the __init__ of your class, define all the parameters you used in your Environment class. +Additionally, fill the path property of the class with the path to your Environment class. +For example, take a look at the EnvironmentParameters class used for Doom:

    +
    +
    class DoomEnvironmentParameters(EnvironmentParameters):
    +def __init__(self):
    +    super().__init__()
    +    self.default_input_filter = DoomInputFilter
    +    self.default_output_filter = DoomOutputFilter
    +    self.cameras = [DoomEnvironment.CameraTypes.OBSERVATION]
    +
    +@property
    +def path(self):
    +    return 'rl_coach.environments.doom_environment:DoomEnvironment'
    +
    +
    +
    +
  6. +
  7. And that’s it, you’re done. Now just add a new preset with your newly created environment, and start training an agent on top of it.

    +
  8. +
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/contributing/add_env/index.html b/docs/contributing/add_env/index.html deleted file mode 100644 index d285c9d..0000000 --- a/docs/contributing/add_env/index.html +++ /dev/null @@ -1,348 +0,0 @@ - - - - - - - - - - - Adding a New Environment - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Contributing »
  • - - - -
  • Adding a New Environment
  • -
  • - -
  • -
-
-
-
-
- -

Adding a new environment to Coach is as easy as solving CartPole.

-

There are essentially two ways to integrate new environments to Coach:

-

Using the OpenAI Gym API

-

If your environment is already using the OpenAI Gym API, you are already good to go. -When selecting the environment parameters in the preset, use GymEnvironmentParameters(), -and pass the path to your environment source code using the level parameter. -You can specify additional parameters for your environment using the additional_simulator_parameters parameter. -Take for example the definition used in the Pendulum_HAC preset:

-
    env_params = GymEnvironmentParameters()
-    env_params.level = "rl_coach.environments.mujoco.pendulum_with_goals:PendulumWithGoals"
-    env_params.additional_simulator_parameters = {"time_limit": 1000}
-
-

Using the Coach API

-

There are a few simple steps to follow, and we will walk through them one by one.

-
    -
  1. -

    Create a new class for your environment, and inherit the Environment class.

    -
  2. -
  3. -

    Coach defines a simple API for implementing a new environment, which are defined in environment/environment.py. - There are several functions to implement, but only some of them are mandatory.

    -

    Here are the important ones:

    -
        def _take_action(self, action_idx: ActionType) -> None:
    -        """
    -        An environment dependent function that sends an action to the simulator.
    -        :param action_idx: the action to perform on the environment
    -        :return: None
    -        """
    -
    -    def _update_state(self) -> None:
    -        """
    -        Updates the state from the environment.
    -        Should update self.observation, self.reward, self.done, self.measurements and self.info
    -        :return: None
    -        """
    -
    -    def _restart_environment_episode(self, force_environment_reset=False) -> None:
    -        """
    -        Restarts the simulator episode
    -        :param force_environment_reset: Force the environment to reset even if the episode is not done yet.
    -        :return: None
    -        """
    -
    -    def _render(self) -> None:
    -        """
    -        Renders the environment using the native simulator renderer
    -        :return: None
    -        """
    -
    -    def get_rendered_image(self) -> np.ndarray:
    -        """
    -        Return a numpy array containing the image that will be rendered to the screen.
    -        This can be different from the observation. For example, mujoco's observation is a measurements vector.
    -        :return: numpy array containing the image that will be rendered to the screen
    -        """
    -
    -
  4. -
  5. -

    Create a new parameters class for your environment, which inherits the EnvironmentParameters class. - In the init of your class, define all the parameters you used in your Environment class. - Additionally, fill the path property of the class with the path to your Environment class. - For example, take a look at the EnvironmentParameters class used for Doom:

    -
        class DoomEnvironmentParameters(EnvironmentParameters):
    -    def __init__(self):
    -        super().__init__()
    -        self.default_input_filter = DoomInputFilter
    -        self.default_output_filter = DoomOutputFilter
    -        self.cameras = [DoomEnvironment.CameraTypes.OBSERVATION]
    -
    -    @property
    -    def path(self):
    -        return 'rl_coach.environments.doom_environment:DoomEnvironment'
    -
    -
  6. -
  7. -

    And that's it, you're done. Now just add a new preset with your newly created environment, and start training an agent on top of it.

    -
  8. -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - -
- - - - - - - - diff --git a/docs/css/highlight.css b/docs/css/highlight.css deleted file mode 100644 index 0ae40a7..0000000 --- a/docs/css/highlight.css +++ /dev/null @@ -1,124 +0,0 @@ -/* -This is the GitHub theme for highlight.js - -github.com style (c) Vasily Polovnyov - -*/ - -.hljs { - display: block; - overflow-x: auto; - color: #333; - -webkit-text-size-adjust: none; -} - -.hljs-comment, -.diff .hljs-header, -.hljs-javadoc { - color: #998; - font-style: italic; -} - -.hljs-keyword, -.css .rule .hljs-keyword, -.hljs-winutils, -.nginx .hljs-title, -.hljs-subst, -.hljs-request, -.hljs-status { - color: #333; - font-weight: bold; -} - -.hljs-number, -.hljs-hexcolor, -.ruby .hljs-constant { - color: #008080; -} - -.hljs-string, -.hljs-tag .hljs-value, -.hljs-phpdoc, -.hljs-dartdoc, -.tex .hljs-formula { - color: #d14; -} - -.hljs-title, -.hljs-id, -.scss .hljs-preprocessor { - color: #900; - font-weight: bold; -} - -.hljs-list .hljs-keyword, -.hljs-subst { - font-weight: normal; -} - -.hljs-class .hljs-title, -.hljs-type, -.vhdl .hljs-literal, -.tex .hljs-command { - color: #458; - font-weight: bold; -} - -.hljs-tag, -.hljs-tag .hljs-title, -.hljs-rule .hljs-property, -.django .hljs-tag .hljs-keyword { - color: #000080; - font-weight: normal; -} - -.hljs-attribute, -.hljs-variable, -.lisp .hljs-body, -.hljs-name { - color: #008080; -} - -.hljs-regexp { - color: #009926; -} - -.hljs-symbol, -.ruby .hljs-symbol .hljs-string, -.lisp .hljs-keyword, -.clojure .hljs-keyword, -.scheme .hljs-keyword, -.tex .hljs-special, -.hljs-prompt { - color: #990073; -} - -.hljs-built_in { - color: #0086b3; -} - -.hljs-preprocessor, -.hljs-pragma, -.hljs-pi, -.hljs-doctype, -.hljs-shebang, -.hljs-cdata { - color: #999; - font-weight: bold; -} - -.hljs-deletion { - background: #fdd; -} - -.hljs-addition { - background: #dfd; -} - -.diff .hljs-change { - background: #0086b3; -} - -.hljs-chunk { - color: #aaa; -} diff --git a/docs/css/theme.css b/docs/css/theme.css deleted file mode 100644 index 099a2d8..0000000 --- a/docs/css/theme.css +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This file is copied from the upstream ReadTheDocs Sphinx - * theme. To aid upgradability this file should *not* be edited. - * modifications we need should be included in theme_extra.css. - * - * https://github.com/rtfd/readthedocs.org/blob/master/readthedocs/core/static/core/css/theme.css - */ - -*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}[hidden]{display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:hover,a:active{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;color:#000;text-decoration:none}mark{background:#ff0;color:#000;font-style:italic;font-weight:bold}pre,code,.rst-content tt,kbd,samp{font-family:monospace,serif;_font-family:"courier new",monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:before,q:after{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}ul,ol,dl{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure{margin:0}form{margin:0}fieldset{border:0;margin:0;padding:0}label{cursor:pointer}legend{border:0;*margin-left:-7px;padding:0;white-space:normal}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*width:13px;*height:13px}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top;resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:0.2em 0;background:#ccc;color:#000;padding:0.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none !important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{html,body,section{background:none !important}*{box-shadow:none !important;text-shadow:none !important;filter:none !important;-ms-filter:none !important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}}.fa:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.btn,input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"],select,textarea,.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a,.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a,.wy-nav-top a{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}/*! - * Font Awesome 4.1.0 by @davegandy - http://fontawesome.io - @fontawesome - * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) - */@font-face{font-family:'FontAwesome';src:url("../fonts/fontawesome-webfont.eot?v=4.1.0");src:url("../fonts/fontawesome-webfont.eot?#iefix&v=4.1.0") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff?v=4.1.0") format("woff"),url("../fonts/fontawesome-webfont.ttf?v=4.1.0") format("truetype"),url("../fonts/fontawesome-webfont.svg?v=4.1.0#fontawesomeregular") format("svg");font-weight:normal;font-style:normal}.fa,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.icon{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:0.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:0.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:solid 0.08em #eee;border-radius:.1em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.rst-content .pull-left.admonition-title,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content dl dt .pull-left.headerlink,.pull-left.icon{margin-right:.3em}.fa.pull-right,.rst-content .pull-right.admonition-title,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content dl dt .pull-right.headerlink,.pull-right.icon{margin-left:.3em}.fa-spin{-webkit-animation:spin 2s infinite linear;-moz-animation:spin 2s infinite linear;-o-animation:spin 2s infinite linear;animation:spin 2s infinite linear}@-moz-keyframes spin{0%{-moz-transform:rotate(0deg)}100%{-moz-transform:rotate(359deg)}}@-webkit-keyframes spin{0%{-webkit-transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg)}}@-o-keyframes spin{0%{-o-transform:rotate(0deg)}100%{-o-transform:rotate(359deg)}}@keyframes spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=1);-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=3);-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=0);-webkit-transform:scale(-1, 1);-moz-transform:scale(-1, 1);-ms-transform:scale(-1, 1);-o-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:scale(1, -1);-moz-transform:scale(1, -1);-ms-transform:scale(1, -1);-o-transform:scale(1, -1);transform:scale(1, -1)}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-gear:before,.fa-cog:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-rotate-right:before,.fa-repeat:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.rst-content .admonition-title:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-warning:before,.fa-exclamation-triangle:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-gears:before,.fa-cogs:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-save:before,.fa-floppy-o:before{content:""}.fa-square:before{content:""}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.wy-dropdown .caret:before,.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-unsorted:before,.fa-sort:before{content:""}.fa-sort-down:before,.fa-sort-desc:before{content:""}.fa-sort-up:before,.fa-sort-asc:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-legal:before,.fa-gavel:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-flash:before,.fa-bolt:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-paste:before,.fa-clipboard:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-unlink:before,.fa-chain-broken:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:""}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:""}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:""}.fa-euro:before,.fa-eur:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-rupee:before,.fa-inr:before{content:""}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:""}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:""}.fa-won:before,.fa-krw:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-turkish-lira:before,.fa-try:before{content:""}.fa-plus-square-o:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-institution:before,.fa-bank:before,.fa-university:before{content:""}.fa-mortar-board:before,.fa-graduation-cap:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-square:before,.fa-pied-piper:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:""}.fa-file-zip-o:before,.fa-file-archive-o:before{content:""}.fa-file-sound-o:before,.fa-file-audio-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before{content:""}.fa-ge:before,.fa-empire:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-send:before,.fa-paper-plane:before{content:""}.fa-send-o:before,.fa-paper-plane-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.icon,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context{font-family:inherit}.fa:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before{font-family:"FontAwesome";display:inline-block;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa,a .rst-content .admonition-title,.rst-content a .admonition-title,a .rst-content h1 .headerlink,.rst-content h1 a .headerlink,a .rst-content h2 .headerlink,.rst-content h2 a .headerlink,a .rst-content h3 .headerlink,.rst-content h3 a .headerlink,a .rst-content h4 .headerlink,.rst-content h4 a .headerlink,a .rst-content h5 .headerlink,.rst-content h5 a .headerlink,a .rst-content h6 .headerlink,.rst-content h6 a .headerlink,a .rst-content dl dt .headerlink,.rst-content dl dt a .headerlink,a .icon{display:inline-block;text-decoration:inherit}.btn .fa,.btn .rst-content .admonition-title,.rst-content .btn .admonition-title,.btn .rst-content h1 .headerlink,.rst-content h1 .btn .headerlink,.btn .rst-content h2 .headerlink,.rst-content h2 .btn .headerlink,.btn .rst-content h3 .headerlink,.rst-content h3 .btn .headerlink,.btn .rst-content h4 .headerlink,.rst-content h4 .btn .headerlink,.btn .rst-content h5 .headerlink,.rst-content h5 .btn .headerlink,.btn .rst-content h6 .headerlink,.rst-content h6 .btn .headerlink,.btn .rst-content dl dt .headerlink,.rst-content dl dt .btn .headerlink,.btn .icon,.nav .fa,.nav .rst-content .admonition-title,.rst-content .nav .admonition-title,.nav .rst-content h1 .headerlink,.rst-content h1 .nav .headerlink,.nav .rst-content h2 .headerlink,.rst-content h2 .nav .headerlink,.nav .rst-content h3 .headerlink,.rst-content h3 .nav .headerlink,.nav .rst-content h4 .headerlink,.rst-content h4 .nav .headerlink,.nav .rst-content h5 .headerlink,.rst-content h5 .nav .headerlink,.nav .rst-content h6 .headerlink,.rst-content h6 .nav .headerlink,.nav .rst-content dl dt .headerlink,.rst-content dl dt .nav .headerlink,.nav .icon{display:inline}.btn .fa.fa-large,.btn .rst-content .fa-large.admonition-title,.rst-content .btn .fa-large.admonition-title,.btn .rst-content h1 .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.btn .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .btn .fa-large.headerlink,.btn .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .fa-large.admonition-title,.rst-content .nav .fa-large.admonition-title,.nav .rst-content h1 .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.nav .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.nav .fa-large.icon{line-height:0.9em}.btn .fa.fa-spin,.btn .rst-content .fa-spin.admonition-title,.rst-content .btn .fa-spin.admonition-title,.btn .rst-content h1 .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.btn .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .btn .fa-spin.headerlink,.btn .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .fa-spin.admonition-title,.rst-content .nav .fa-spin.admonition-title,.nav .rst-content h1 .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.nav .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.nav .fa-spin.icon{display:inline-block}.btn.fa:before,.rst-content .btn.admonition-title:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content dl dt .btn.headerlink:before,.btn.icon:before{opacity:0.5;-webkit-transition:opacity 0.05s ease-in;-moz-transition:opacity 0.05s ease-in;transition:opacity 0.05s ease-in}.btn.fa:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.btn.icon:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .rst-content .admonition-title:before,.rst-content .btn-mini .admonition-title:before,.btn-mini .rst-content h1 .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.btn-mini .rst-content dl dt .headerlink:before,.rst-content dl dt .btn-mini .headerlink:before,.btn-mini .icon:before{font-size:14px;vertical-align:-15%}.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.wy-alert-title,.rst-content .admonition-title{color:#fff;font-weight:bold;display:block;color:#fff;background:#6ab0de;margin:-12px;padding:6px 12px;margin-bottom:12px}.wy-alert.wy-alert-danger,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.admonition-todo{background:#fdf3f2}.wy-alert.wy-alert-danger .wy-alert-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .danger .wy-alert-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .danger .admonition-title,.rst-content .error .admonition-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title{background:#f29f97}.wy-alert.wy-alert-warning,.rst-content .wy-alert-warning.note,.rst-content .attention,.rst-content .caution,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.tip,.rst-content .warning,.rst-content .wy-alert-warning.seealso,.rst-content .admonition-todo{background:#ffedcc}.wy-alert.wy-alert-warning .wy-alert-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .attention .wy-alert-title,.rst-content .caution .wy-alert-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .admonition-todo .wy-alert-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .attention .admonition-title,.rst-content .caution .admonition-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .warning .admonition-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .admonition-todo .admonition-title{background:#f0b37e}.wy-alert.wy-alert-info,.rst-content .note,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.rst-content .seealso,.rst-content .wy-alert-info.admonition-todo{background:#e7f2fa}.wy-alert.wy-alert-info .wy-alert-title,.rst-content .note .wy-alert-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.rst-content .note .admonition-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .seealso .admonition-title,.rst-content .wy-alert-info.admonition-todo .admonition-title{background:#6ab0de}.wy-alert.wy-alert-success,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.warning,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.admonition-todo{background:#dbfaf4}.wy-alert.wy-alert-success .wy-alert-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .hint .wy-alert-title,.rst-content .important .wy-alert-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .hint .admonition-title,.rst-content .important .admonition-title,.rst-content .tip .admonition-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.admonition-todo .admonition-title{background:#1abc9c}.wy-alert.wy-alert-neutral,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.admonition-todo{background:#f3f6f6}.wy-alert.wy-alert-neutral .wy-alert-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .admonition-title{color:#404040;background:#e1e4e5}.wy-alert.wy-alert-neutral a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.admonition-todo a{color:#2980B9}.wy-alert p:last-child,.rst-content .note p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.rst-content .seealso p:last-child,.rst-content .admonition-todo p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0px;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,0.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all 0.3s ease-in;-moz-transition:all 0.3s ease-in;transition:all 0.3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27AE60}.wy-tray-container li.wy-tray-item-info{background:#2980B9}.wy-tray-container li.wy-tray-item-warning{background:#E67E22}.wy-tray-container li.wy-tray-item-danger{background:#E74C3C}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width: 768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px 12px;color:#fff;border:1px solid rgba(0,0,0,0.1);background-color:#27AE60;text-decoration:none;font-weight:normal;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:0px 1px 2px -1px rgba(255,255,255,0.5) inset,0px -2px 0px 0px rgba(0,0,0,0.1) inset;outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all 0.1s linear;-moz-transition:all 0.1s linear;transition:all 0.1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:0px -1px 0px 0px rgba(0,0,0,0.05) inset,0px 2px 0px 0px rgba(0,0,0,0.1) inset;padding:8px 12px 6px 12px}.btn:visited{color:#fff}.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn-disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn-disabled:hover,.btn-disabled:focus,.btn-disabled:active{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:0.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980B9 !important}.btn-info:hover{background-color:#2e8ece !important}.btn-neutral{background-color:#f3f6f6 !important;color:#404040 !important}.btn-neutral:hover{background-color:#e5ebeb !important;color:#404040}.btn-neutral:visited{color:#404040 !important}.btn-success{background-color:#27AE60 !important}.btn-success:hover{background-color:#295 !important}.btn-danger{background-color:#E74C3C !important}.btn-danger:hover{background-color:#ea6153 !important}.btn-warning{background-color:#E67E22 !important}.btn-warning:hover{background-color:#e98b39 !important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f !important}.btn-link{background-color:transparent !important;color:#2980B9;box-shadow:none;border-color:transparent !important}.btn-link:hover{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:active{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:visited{color:#9B59B6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:before,.wy-btn-group:after{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:solid 1px #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,0.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980B9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:solid 1px #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type="search"]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980B9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned input,.wy-form-aligned textarea,.wy-form-aligned select,.wy-form-aligned .wy-help-inline,.wy-form-aligned label{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{border:0;margin:0;padding:0}legend{display:block;width:100%;border:0;padding:0;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label{display:block;margin:0 0 0.3125em 0;color:#999;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;*zoom:1;max-width:68em;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#E74C3C}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full input[type="text"],.wy-control-group .wy-form-full input[type="password"],.wy-control-group .wy-form-full input[type="email"],.wy-control-group .wy-form-full input[type="url"],.wy-control-group .wy-form-full input[type="date"],.wy-control-group .wy-form-full input[type="month"],.wy-control-group .wy-form-full input[type="time"],.wy-control-group .wy-form-full input[type="datetime"],.wy-control-group .wy-form-full input[type="datetime-local"],.wy-control-group .wy-form-full input[type="week"],.wy-control-group .wy-form-full input[type="number"],.wy-control-group .wy-form-full input[type="search"],.wy-control-group .wy-form-full input[type="tel"],.wy-control-group .wy-form-full input[type="color"],.wy-control-group .wy-form-halves input[type="text"],.wy-control-group .wy-form-halves input[type="password"],.wy-control-group .wy-form-halves input[type="email"],.wy-control-group .wy-form-halves input[type="url"],.wy-control-group .wy-form-halves input[type="date"],.wy-control-group .wy-form-halves input[type="month"],.wy-control-group .wy-form-halves input[type="time"],.wy-control-group .wy-form-halves input[type="datetime"],.wy-control-group .wy-form-halves input[type="datetime-local"],.wy-control-group .wy-form-halves input[type="week"],.wy-control-group .wy-form-halves input[type="number"],.wy-control-group .wy-form-halves input[type="search"],.wy-control-group .wy-form-halves input[type="tel"],.wy-control-group .wy-form-halves input[type="color"],.wy-control-group .wy-form-thirds input[type="text"],.wy-control-group .wy-form-thirds input[type="password"],.wy-control-group .wy-form-thirds input[type="email"],.wy-control-group .wy-form-thirds input[type="url"],.wy-control-group .wy-form-thirds input[type="date"],.wy-control-group .wy-form-thirds input[type="month"],.wy-control-group .wy-form-thirds input[type="time"],.wy-control-group .wy-form-thirds input[type="datetime"],.wy-control-group .wy-form-thirds input[type="datetime-local"],.wy-control-group .wy-form-thirds input[type="week"],.wy-control-group .wy-form-thirds input[type="number"],.wy-control-group .wy-form-thirds input[type="search"],.wy-control-group .wy-form-thirds input[type="tel"],.wy-control-group .wy-form-thirds input[type="color"]{width:100%}.wy-control-group .wy-form-full{float:left;display:block;margin-right:2.35765%;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child{margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n+1){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child{margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control{margin:6px 0 0 0;font-size:90%}.wy-control-no-input{display:inline-block;margin:6px 0 0 0;font-size:90%}.wy-control-group.fluid-input input[type="text"],.wy-control-group.fluid-input input[type="password"],.wy-control-group.fluid-input input[type="email"],.wy-control-group.fluid-input input[type="url"],.wy-control-group.fluid-input input[type="date"],.wy-control-group.fluid-input input[type="month"],.wy-control-group.fluid-input input[type="time"],.wy-control-group.fluid-input input[type="datetime"],.wy-control-group.fluid-input input[type="datetime-local"],.wy-control-group.fluid-input input[type="week"],.wy-control-group.fluid-input input[type="number"],.wy-control-group.fluid-input input[type="search"],.wy-control-group.fluid-input input[type="tel"],.wy-control-group.fluid-input input[type="color"]{width:100%}.wy-form-message-inline{display:inline-block;padding-left:0.3em;color:#666;vertical-align:middle;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:0.3125em;font-style:italic}input{line-height:normal}input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;*overflow:visible}input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border 0.3s linear;-moz-transition:border 0.3s linear;transition:border 0.3s linear}input[type="datetime-local"]{padding:0.34375em 0.625em}input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;padding:0;margin-right:0.3125em;*height:13px;*width:13px}input[type="search"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}input[type="text"]:focus,input[type="password"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus{outline:0;outline:thin dotted \9;border-color:#333}input.no-focus:focus{border-color:#ccc !important}input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:1px auto #129FEA}input[type="text"][disabled],input[type="password"][disabled],input[type="email"][disabled],input[type="url"][disabled],input[type="date"][disabled],input[type="month"][disabled],input[type="time"][disabled],input[type="datetime"][disabled],input[type="datetime-local"][disabled],input[type="week"][disabled],input[type="number"][disabled],input[type="search"][disabled],input[type="tel"][disabled],input[type="color"][disabled]{cursor:not-allowed;background-color:#f3f6f6;color:#cad2d3}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#E74C3C;border:1px solid #E74C3C}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#E74C3C}input[type="file"]:focus:invalid:focus,input[type="radio"]:focus:invalid:focus,input[type="checkbox"]:focus:invalid:focus{outline-color:#E74C3C}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif}select,textarea{padding:0.5em 0.625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border 0.3s linear;-moz-transition:border 0.3s linear;transition:border 0.3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#fff;color:#cad2d3;border-color:transparent}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{padding:6px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:solid 1px #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#E74C3C}.wy-control-group.wy-control-group-error input[type="text"],.wy-control-group.wy-control-group-error input[type="password"],.wy-control-group.wy-control-group-error input[type="email"],.wy-control-group.wy-control-group-error input[type="url"],.wy-control-group.wy-control-group-error input[type="date"],.wy-control-group.wy-control-group-error input[type="month"],.wy-control-group.wy-control-group-error input[type="time"],.wy-control-group.wy-control-group-error input[type="datetime"],.wy-control-group.wy-control-group-error input[type="datetime-local"],.wy-control-group.wy-control-group-error input[type="week"],.wy-control-group.wy-control-group-error input[type="number"],.wy-control-group.wy-control-group-error input[type="search"],.wy-control-group.wy-control-group-error input[type="tel"],.wy-control-group.wy-control-group-error input[type="color"]{border:solid 1px #E74C3C}.wy-control-group.wy-control-group-error textarea{border:solid 1px #E74C3C}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:0.5em 0.625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27AE60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#E74C3C}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#E67E22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980B9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width: 480px){.wy-form button[type="submit"]{margin:0.7em 0 0}.wy-form input[type="text"],.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0.3em;display:block}.wy-form label{margin-bottom:0.3em;display:block}.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:0.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0 0}.wy-form .wy-help-inline,.wy-form-message-inline,.wy-form-message{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width: 768px){.tablet-hide{display:none}}@media screen and (max-width: 480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.wy-table,.rst-content table.docutils,.rst-content table.field-list{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.wy-table caption,.rst-content table.docutils caption,.rst-content table.field-list caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td,.wy-table th,.rst-content table.docutils th,.rst-content table.field-list th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.wy-table td:first-child,.rst-content table.docutils td:first-child,.rst-content table.field-list td:first-child,.wy-table th:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list th:first-child{border-left-width:0}.wy-table thead,.rst-content table.docutils thead,.rst-content table.field-list thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.wy-table thead th,.rst-content table.docutils thead th,.rst-content table.field-list thead th{font-weight:bold;border-bottom:solid 2px #e1e4e5}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td{background-color:transparent;vertical-align:middle}.wy-table td p,.rst-content table.docutils td p,.rst-content table.field-list td p{line-height:18px}.wy-table td p:last-child,.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child{margin-bottom:0}.wy-table .wy-table-cell-min,.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min{width:1%;padding-right:0}.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:gray;font-size:90%}.wy-table-tertiary{color:gray;font-size:80%}.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td,.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td{background-color:#f3f6f6}.wy-table-backed{background-color:#f3f6f6}.wy-table-bordered-all,.rst-content table.docutils{border:1px solid #e1e4e5}.wy-table-bordered-all td,.rst-content table.docutils td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.wy-table-bordered-all tbody>tr:last-child td,.rst-content table.docutils tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px 0;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0 !important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980B9;text-decoration:none}a:hover{color:#3091d1}a:visited{color:#9B59B6}html{height:100%;overflow-x:hidden}body{font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;font-weight:normal;color:#404040;min-height:100%;overflow-x:hidden;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#E67E22 !important}a.wy-text-warning:hover{color:#eb9950 !important}.wy-text-info{color:#2980B9 !important}a.wy-text-info:hover{color:#409ad5 !important}.wy-text-success{color:#27AE60 !important}a.wy-text-success:hover{color:#36d278 !important}.wy-text-danger{color:#E74C3C !important}a.wy-text-danger:hover{color:#ed7669 !important}.wy-text-neutral{color:#404040 !important}a.wy-text-neutral:hover{color:#595959 !important}h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif}p{line-height:24px;margin:0;font-size:16px;margin-bottom:24px}h1{font-size:175%}h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}code,.rst-content tt{white-space:nowrap;max-width:100%;background:#fff;border:solid 1px #e1e4e5;font-size:75%;padding:0 5px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;color:#E74C3C;overflow-x:auto}code.code-large,.rst-content tt.code-large{font-size:90%}.wy-plain-list-disc,.rst-content .section ul,.rst-content .toctree-wrapper ul,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.wy-plain-list-disc li,.rst-content .section ul li,.rst-content .toctree-wrapper ul li,article ul li{list-style:disc;margin-left:24px}.wy-plain-list-disc li p:last-child,.rst-content .section ul li p:last-child,.rst-content .toctree-wrapper ul li p:last-child,article ul li p:last-child{margin-bottom:0}.wy-plain-list-disc li ul,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li ul,article ul li ul{margin-bottom:0}.wy-plain-list-disc li li,.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,article ul li li{list-style:circle}.wy-plain-list-disc li li li,.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,article ul li li li{list-style:square}.wy-plain-list-disc li ol li,.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,article ul li ol li{list-style:decimal}.wy-plain-list-decimal,.rst-content .section ol,.rst-content ol.arabic,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.wy-plain-list-decimal li,.rst-content .section ol li,.rst-content ol.arabic li,article ol li{list-style:decimal;margin-left:24px}.wy-plain-list-decimal li p:last-child,.rst-content .section ol li p:last-child,.rst-content ol.arabic li p:last-child,article ol li p:last-child{margin-bottom:0}.wy-plain-list-decimal li ul,.rst-content .section ol li ul,.rst-content ol.arabic li ul,article ol li ul{margin-bottom:0}.wy-plain-list-decimal li ul li,.rst-content .section ol li ul li,.rst-content ol.arabic li ul li,article ol li ul li{list-style:disc}.codeblock-example{border:1px solid #e1e4e5;border-bottom:none;padding:24px;padding-top:48px;font-weight:500;background:#fff;position:relative}.codeblock-example:after{content:"Example";position:absolute;top:0px;left:0px;background:#9B59B6;color:#fff;padding:6px 12px}.codeblock-example.prettyprint-example-only{border:1px solid #e1e4e5;margin-bottom:24px}.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight']{border:1px solid #e1e4e5;padding:0px;overflow-x:auto;background:#fff;margin:1px 0 24px 0}.codeblock div[class^='highlight'],pre.literal-block div[class^='highlight'],.rst-content .literal-block div[class^='highlight'],div[class^='highlight'] div[class^='highlight']{border:none;background:none;margin:0}div[class^='highlight'] td.code{width:100%}.linenodiv pre{border-right:solid 1px #e6e9ea;margin:0;padding:12px 12px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;font-size:12px;line-height:1.5;color:#d9d9d9}div[class^='highlight'] pre{white-space:pre;margin:0;padding:12px 12px;font-family:Consolas,"Andale Mono WT","Andale Mono","Lucida Console","Lucida Sans Typewriter","DejaVu Sans Mono","Bitstream Vera Sans Mono","Liberation Mono","Nimbus Mono L",Monaco,"Courier New",Courier,monospace;font-size:12px;line-height:1.5;display:block;overflow:auto;color:#404040}@media print{.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'],div[class^='highlight'] pre{white-space:pre-wrap}}.hll{background-color:#ffc;margin:0 -12px;padding:0 12px;display:block}.c{color:#998;font-style:italic}.err{color:#a61717;background-color:#e3d2d2}.k{font-weight:bold}.o{font-weight:bold}.cm{color:#998;font-style:italic}.cp{color:#999;font-weight:bold}.c1{color:#998;font-style:italic}.cs{color:#999;font-weight:bold;font-style:italic}.gd{color:#000;background-color:#fdd}.gd .x{color:#000;background-color:#faa}.ge{font-style:italic}.gr{color:#a00}.gh{color:#999}.gi{color:#000;background-color:#dfd}.gi .x{color:#000;background-color:#afa}.go{color:#888}.gp{color:#555}.gs{font-weight:bold}.gu{color:purple;font-weight:bold}.gt{color:#a00}.kc{font-weight:bold}.kd{font-weight:bold}.kn{font-weight:bold}.kp{font-weight:bold}.kr{font-weight:bold}.kt{color:#458;font-weight:bold}.m{color:#099}.s{color:#d14}.n{color:#333}.na{color:teal}.nb{color:#0086b3}.nc{color:#458;font-weight:bold}.no{color:teal}.ni{color:purple}.ne{color:#900;font-weight:bold}.nf{color:#900;font-weight:bold}.nn{color:#555}.nt{color:navy}.nv{color:teal}.ow{font-weight:bold}.w{color:#bbb}.mf{color:#099}.mh{color:#099}.mi{color:#099}.mo{color:#099}.sb{color:#d14}.sc{color:#d14}.sd{color:#d14}.s2{color:#d14}.se{color:#d14}.sh{color:#d14}.si{color:#d14}.sx{color:#d14}.sr{color:#009926}.s1{color:#d14}.ss{color:#990073}.bp{color:#999}.vc{color:teal}.vg{color:teal}.vi{color:teal}.il{color:#099}.gc{color:#999;background-color:#EAF2F5}.wy-breadcrumbs li{display:inline-block}.wy-breadcrumbs li.wy-breadcrumbs-aside{float:right}.wy-breadcrumbs li a{display:inline-block;padding:5px}.wy-breadcrumbs li a:first-child{padding-left:0}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width: 480px){.wy-breadcrumbs-extra{display:none}.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:before,.wy-menu-horiz:after{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz ul,.wy-menu-horiz li{display:inline-block}.wy-menu-horiz li:hover{background:rgba(255,255,255,0.1)}.wy-menu-horiz li.divide-left{border-left:solid 1px #404040}.wy-menu-horiz li.divide-right{border-right:solid 1px #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical header{height:32px;display:inline-block;line-height:32px;padding:0 1.618em;display:block;font-weight:bold;text-transform:uppercase;font-size:80%;color:#2980B9;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:solid 1px #404040}.wy-menu-vertical li.divide-bottom{border-bottom:solid 1px #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:gray;border-right:solid 1px #c9c9c9;padding:0.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a{color:#404040;padding:0.4045em 1.618em;font-weight:bold;position:relative;background:#fcfcfc;border:none;border-bottom:solid 1px #c9c9c9;border-top:solid 1px #c9c9c9;padding-left:1.618em -4px}.wy-menu-vertical li.on a:hover,.wy-menu-vertical li.current>a:hover{background:#fcfcfc}.wy-menu-vertical li.toctree-l2.current>a{background:#c9c9c9;padding:0.4045em 2.427em}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical .local-toc li ul{display:block}.wy-menu-vertical li ul li a{margin-bottom:0;color:#b3b3b3;font-weight:normal}.wy-menu-vertical a{display:inline-block;line-height:18px;padding:0.4045em 1.618em;display:block;position:relative;font-size:90%;color:#b3b3b3}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:active{background-color:#2980B9;cursor:pointer;color:#fff}.wy-side-nav-search{z-index:200;background-color:#2980B9;text-align:center;padding:0.809em;display:block;color:#fcfcfc;margin-bottom:0.809em}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto 0.809em auto;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a{color:#fcfcfc;font-size:100%;font-weight:bold;display:inline-block;padding:4px 6px;margin-bottom:0.809em}.wy-side-nav-search>a:hover,.wy-side-nav-search .wy-dropdown>a:hover{background:rgba(255,255,255,0.1)}.wy-nav .wy-menu-vertical header{color:#2980B9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980B9;color:#fff}[data-menu-wrap]{-webkit-transition:all 0.2s ease-in;-moz-transition:all 0.2s ease-in;transition:all 0.2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:left repeat-y #fcfcfc;background-image:url();background-size:300px 1px}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:absolute;top:0;left:0;width:300px;overflow:hidden;min-height:100%;background:#343131;z-index:200}.wy-nav-top{display:none;background:#2980B9;color:#fff;padding:0.4045em 0.809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:before,.wy-nav-top:after{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:bold}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980B9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,0.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:#999}footer p{margin-bottom:12px}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:before,.rst-footer-buttons:after{display:table;content:""}.rst-footer-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:solid 1px #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:solid 1px #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:gray;font-size:90%}@media screen and (max-width: 768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width: 1400px){.wy-nav-content-wrap{background:rgba(0,0,0,0.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,footer,.wy-nav-side{display:none}.wy-nav-content-wrap{margin-left:0}}nav.stickynav{position:fixed;top:0}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;border-top:solid 10px #343131;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .icon{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}img{width:100%;height:auto}}.rst-content img{max-width:100%;height:auto !important}.rst-content div.figure{margin-bottom:24px}.rst-content div.figure.align-center{text-align:center}.rst-content .section>img{margin-bottom:24px}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content .note .last,.rst-content .attention .last,.rst-content .caution .last,.rst-content .danger .last,.rst-content .error .last,.rst-content .hint .last,.rst-content .important .last,.rst-content .tip .last,.rst-content .warning .last,.rst-content .seealso .last,.rst-content .admonition-todo .last{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,0.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent !important;border-color:rgba(0,0,0,0.1) !important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha li{list-style:upper-alpha}.rst-content .section ol p,.rst-content .section ul p{margin-bottom:12px}.rst-content .line-block{margin-left:24px}.rst-content .topic-title{font-weight:bold;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0px 0px 24px 24px}.rst-content .align-left{float:left;margin:0px 24px 24px 0px}.rst-content .align-center{margin:auto;display:block}.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink{display:none;visibility:hidden;font-size:14px}.rst-content h1 .headerlink:after,.rst-content h2 .headerlink:after,.rst-content h3 .headerlink:after,.rst-content h4 .headerlink:after,.rst-content h5 .headerlink:after,.rst-content h6 .headerlink:after,.rst-content dl dt .headerlink:after{visibility:visible;content:"";font-family:FontAwesome;display:inline-block}.rst-content h1:hover .headerlink,.rst-content h2:hover .headerlink,.rst-content h3:hover .headerlink,.rst-content h4:hover .headerlink,.rst-content h5:hover .headerlink,.rst-content h6:hover .headerlink,.rst-content dl dt:hover .headerlink{display:inline-block}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:solid 1px #e1e4e5}.rst-content .sidebar p,.rst-content .sidebar ul,.rst-content .sidebar dl{font-size:90%}.rst-content .sidebar .last{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif;font-weight:bold;background:#e1e4e5;padding:6px 12px;margin:-24px;margin-bottom:24px;font-size:100%}.rst-content .highlighted{background:#F1C40F;display:inline-block;font-weight:bold;padding:0 6px}.rst-content .footnote-reference,.rst-content .citation-reference{vertical-align:super;font-size:90%}.rst-content table.docutils.citation,.rst-content table.docutils.footnote{background:none;border:none;color:#999}.rst-content table.docutils.citation td,.rst-content table.docutils.citation tr,.rst-content table.docutils.footnote td,.rst-content table.docutils.footnote tr{border:none;background-color:transparent !important;white-space:normal}.rst-content table.docutils.citation td.label,.rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}.rst-content table.field-list{border:none}.rst-content table.field-list td{border:none;padding-top:5px}.rst-content table.field-list td>strong{display:inline-block;margin-top:3px}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left;padding-left:0}.rst-content tt{color:#000}.rst-content tt big,.rst-content tt em{font-size:100% !important;line-height:normal}.rst-content tt .xref,a .rst-content tt{font-weight:bold}.rst-content a tt{color:#2980B9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:bold}.rst-content dl p,.rst-content dl table,.rst-content dl ul,.rst-content dl ol{margin-bottom:12px !important}.rst-content dl dd{margin:0 0 12px 24px}.rst-content dl:not(.docutils){margin-bottom:24px}.rst-content dl:not(.docutils) dt{display:inline-block;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980B9;border-top:solid 3px #6ab0de;padding:6px;position:relative}.rst-content dl:not(.docutils) dt:before{color:#6ab0de}.rst-content dl:not(.docutils) dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dl dt{margin-bottom:6px;border:none;border-left:solid 3px #ccc;background:#f0f0f0;color:gray}.rst-content dl:not(.docutils) dl dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dt:first-child{margin-top:0}.rst-content dl:not(.docutils) tt{font-weight:bold}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descclassname{background-color:transparent;border:none;padding:0;font-size:100% !important}.rst-content dl:not(.docutils) tt.descname{font-weight:bold}.rst-content dl:not(.docutils) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:bold}.rst-content dl:not(.docutils) .property{display:inline-block;padding-right:8px}.rst-content .viewcode-link,.rst-content .viewcode-back{display:inline-block;color:#27AE60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:bold}@media screen and (max-width: 480px){.rst-content .sidebar{width:100%}}span[id*='MathJax-Span']{color:#404040}.math{text-align:center} diff --git a/docs/css/theme_extra.css b/docs/css/theme_extra.css deleted file mode 100644 index cf8123e..0000000 --- a/docs/css/theme_extra.css +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Sphinx doesn't have support for section dividers like we do in - * MkDocs, this styles the section titles in the nav - * - * https://github.com/mkdocs/mkdocs/issues/175 - */ -.wy-menu-vertical span { - line-height: 18px; - padding: 0.4045em 1.618em; - display: block; - position: relative; - font-size: 90%; - color: #838383; -} - -.wy-menu-vertical .subnav a { - padding: 0.4045em 2.427em; -} - -/* - * Long navigations run off the bottom of the screen as the nav - * area doesn't scroll. - * - * https://github.com/mkdocs/mkdocs/pull/202 - * - * Builds upon pull 202 https://github.com/mkdocs/mkdocs/pull/202 - * to make toc scrollbar end before navigations buttons to not be overlapping. - */ -.wy-nav-side { - height: calc(100% - 45px); - overflow-y: auto; - min-height: 0; -} - -.rst-versions{ - border-top: 0; - height: 45px; -} - -@media screen and (max-width: 768px) { - .wy-nav-side { - height: 100%; - } -} - -/* - * readthedocs theme hides nav items when the window height is - * too small to contain them. - * - * https://github.com/mkdocs/mkdocs/issues/#348 - */ -.wy-menu-vertical ul { - margin-bottom: 2em; -} - -/* - * Wrap inline code samples otherwise they shoot of the side and - * can't be read at all. - * - * https://github.com/mkdocs/mkdocs/issues/313 - * https://github.com/mkdocs/mkdocs/issues/233 - * https://github.com/mkdocs/mkdocs/issues/834 - */ -code { - white-space: pre-wrap; - word-wrap: break-word; - padding: 2px 5px; -} - -/** - * Make code blocks display as blocks and give them the appropriate - * font size and padding. - * - * https://github.com/mkdocs/mkdocs/issues/855 - * https://github.com/mkdocs/mkdocs/issues/834 - * https://github.com/mkdocs/mkdocs/issues/233 - */ -pre code { - white-space: pre; - word-wrap: normal; - display: block; - padding: 12px; - font-size: 12px; -} - -/* - * Fix link colors when the link text is inline code. - * - * https://github.com/mkdocs/mkdocs/issues/718 - */ -a code { - color: #2980B9; -} -a:hover code { - color: #3091d1; -} -a:visited code { - color: #9B59B6; -} - -/* - * The CSS classes from highlight.js seem to clash with the - * ReadTheDocs theme causing some code to be incorrectly made - * bold and italic. - * - * https://github.com/mkdocs/mkdocs/issues/411 - */ -pre .cs, pre .c { - font-weight: inherit; - font-style: inherit; -} - -/* - * Fix some issues with the theme and non-highlighted code - * samples. Without and highlighting styles attached the - * formatting is broken. - * - * https://github.com/mkdocs/mkdocs/issues/319 - */ -.no-highlight { - display: block; - padding: 0.5em; - color: #333; -} - - -/* - * Additions specific to the search functionality provided by MkDocs - */ - -.search-results article { - margin-top: 23px; - border-top: 1px solid #E1E4E5; - padding-top: 24px; -} - -.search-results article:first-child { - border-top: none; -} - -form .search-query { - width: 100%; - border-radius: 50px; - padding: 6px 12px; /* csslint allow: box-model */ - border-color: #D1D4D5; -} - -.wy-menu-vertical li ul { - display: inherit; -} - -.wy-menu-vertical li ul.subnav ul.subnav{ - padding-left: 1em; -} - -.wy-menu-vertical .subnav li.current > a { - padding-left: 2.42em; -} -.wy-menu-vertical .subnav li.current > ul li a { - padding-left: 3.23em; -} - -/* - * Improve inline code blocks within admonitions. - * - * https://github.com/mkdocs/mkdocs/issues/656 - */ - .admonition code { - color: #404040; - border: 1px solid #c7c9cb; - border: 1px solid rgba(0, 0, 0, 0.2); - background: #f8fbfd; - background: rgba(255, 255, 255, 0.7); -} - -/* - * Account for wide tables which go off the side. - * Override borders to avoid wierdness on narrow tables. - * - * https://github.com/mkdocs/mkdocs/issues/834 - * https://github.com/mkdocs/mkdocs/pull/1034 - */ -.rst-content .section .docutils { - width: 100%; - overflow: auto; - display: block; - border: none; -} - -td, th { - border: 1px solid #e1e4e5 !important; /* csslint allow: important */ - border-collapse: collapse; -} - diff --git a/docs/dashboard.html b/docs/dashboard.html new file mode 100644 index 0000000..0b92a8f --- /dev/null +++ b/docs/dashboard.html @@ -0,0 +1,279 @@ + + + + + + + + + + + Coach Dashboard — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Coach Dashboard

+

Reinforcement learning algorithms are neat. That is - when they work. But when they don’t, RL algorithms are often quite tricky to debug.

+

Finding the root cause for why things break in RL is rather difficult. Moreover, different RL algorithms shine in some aspects, but then lack on other. Comparing the algorithms faithfully is also a hard task, which requires the right tools.

+

Coach Dashboard is a visualization tool which simplifies the analysis of the training process. Each run of Coach extracts a lot of information from within the algorithm and stores it in the experiment directory. This information is very valuable for debugging, analyzing and comparing different algorithms. But without a good visualization tool, this information can not be utilized. This is where Coach Dashboard takes place.

+
+

Visualizing Signals

+

Coach Dashboard exposes a convenient user interface for visualizing the training signals. The signals are dynamically updated - during the agent training. Additionaly, it allows selecting a subset of the available signals, and then overlaying them on top of each other.

+_images/updating_dynamically.gif +
    +
  • Holding the CTRL key, while selecting signals, will allow visualizing more than one signal.
  • +
  • Signals can be visualized, using either of the Y-axes, in order to visualize signals with different scales. To move a signal to the second Y-axis, select it and press the ‘Toggle Second Axis’ button.
  • +
+
+
+

Tracking Statistics

+

When running parallel algorithms, such as A3C, it often helps visualizing the learning of all the workers, at the same time. Coach Dashboard allows viewing multiple signals (and even smooth them out, if required) from multiple workers. In addition, it supports viewing the mean and standard deviation of the same signal, across different workers, using Bollinger bands.

+
+_images/bollinger_bands.png +

Displaying Bollinger Bands

+
+
+_images/separate_signals.png +

Displaying all the Workers

+
+
+
+

Comparing Runs

+

Reinforcement learning algorithms are notoriously known as unstable, and suffer from high run-to-run variance. This makes benchmarking and comparing different algorithms even harder. To ease this process, it is common to execute several runs of the same algorithm and average over them. This is easy to do with Coach Dashboard, by centralizing all the experiment directories in a single directory, and then loading them as a single group. Loading several groups of different algorithms then allows comparing the averaged signals, such as the total episode reward.

+

In RL, there are several interesting performance metrics to consider, and this is easy to do by controlling the X-axis units in Coach Dashboard. It is possible to switch between several options such as the total number of steps or the total training time.

+
+_images/compare_by_time.png +

Comparing Several Algorithms According to the Time Passed

+
+
+_images/compare_by_num_episodes.png +

Comparing Several Algorithms According to the Number of Episodes Played

+
+
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/dashboard/index.html b/docs/dashboard/index.html deleted file mode 100644 index 3befb06..0000000 --- a/docs/dashboard/index.html +++ /dev/null @@ -1,345 +0,0 @@ - - - - - - - - - - - Coach Dashboard - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Coach Dashboard
  • -
  • - -
  • -
-
-
-
-
- -

Reinforcement learning algorithms are neat. That is - when they work. But when they don't, RL algorithms are often quite tricky to debug.

-

Finding the root cause for why things break in RL is rather difficult. Moreover, different RL algorithms shine in some aspects, but then lack on other. Comparing the algorithms faithfully is also a hard task, which requires the right tools.

-

Coach Dashboard is a visualization tool which simplifies the analysis of the training process. Each run of Coach extracts a lot of information from within the algorithm and stores it in the experiment directory. This information is very valuable for debugging, analyzing and comparing different algorithms. But without a good visualization tool, this information can not be utilized. This is where Coach Dashboard takes place.

-

Visualizing Signals

-

Coach Dashboard exposes a convenient user interface for visualizing the training signals. The signals are dynamically updated - during the agent training. Additionaly, it allows selecting a subset of the available signals, and then overlaying them on top of each other.

-

- -Updating Dynamically - -

- -
    -
  • Holding the CTRL key, while selecting signals, will allow visualizing more than one signal.
  • -
  • Signals can be visualized, using either of the Y-axes, in order to visualize signals with different scales. To move a signal to the second Y-axis, select it and press the 'Toggle Second Axis' button.
  • -
-

Tracking Statistics

-

When running parallel algorithms, such as A3C, it often helps visualizing the learning of all the workers, at the same time. Coach Dashboard allows viewing multiple signals (and even smooth them out, if required) from multiple workers. In addition, it supports viewing the mean and standard deviation of the same signal, across different workers, using Bollinger bands.

-

- - - - - -
- Bollinger Bands - Displaying Bollinger Bands - - Separate Signals - Displaying All The Workers -
- - - - - -

- -

Comparing Runs

-

Reinforcement learning algorithms are notoriously known as unstable, and suffer from high run-to-run variance. This makes benchmarking and comparing different algorithms even harder. To ease this process, it is common to execute several runs of the same algorithm and average over them. This is easy to do with Coach Dashboard, by centralizing all the experiment directories in a single directory, and then loading them as a single group. Loading several groups of different algorithms then allows comparing the averaged signals, such as the total episode reward.

-

In RL, there are several interesting performance metrics to consider, and this is easy to do by controlling the X-axis units in Coach Dashboard. It is possible to switch between several options such as the total number of steps or the total training time.

-

- - - - - - - - - - -
- -Comparing By Time - - -Comparing Several Algorithms According to the Time Passed - - - - -Comparing By Number of Episodes - - -Comparing Several Algorithms According to the Number of Episodes Played - - -
- - - -

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/design/control_flow.html b/docs/design/control_flow.html new file mode 100644 index 0000000..322c2ad --- /dev/null +++ b/docs/design/control_flow.html @@ -0,0 +1,325 @@ + + + + + + + + + + + Control Flow — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Control Flow

+

Coach is built in a modular way, encouraging modules reuse and reducing the amount of boilerplate code needed +for developing new algorithms or integrating a new challenge as an environment. +On the other hand, it can be overwhelming for new users to ramp up on the code. +To help with that, here’s a short overview of the control flow.

+
+

Graph Manager

+

The main entry point for Coach is coach.py. +The main functionality of this script is to parse the command line arguments and invoke all the sub-processes needed +for the given experiment. +coach.py executes the given preset file which returns a GraphManager object.

+

A preset is a design pattern that is intended for concentrating the entire definition of an experiment in a single +file. This helps with experiments reproducibility, improves readability and prevents confusion. +The outcome of a preset is a GraphManager which will usually be instantiated in the final lines of the preset.

+

A GraphManager is an object that holds all the agents and environments of an experiment, and is mostly responsible +for scheduling their work. Why is it called a graph manager? Because agents and environments are structured into +a graph of interactions. For example, in hierarchical reinforcement learning schemes, there will often be a master +policy agent, that will control a sub-policy agent, which will interact with the environment. Other schemes can have +much more complex graphs of control, such as several hierarchy layers, each with multiple agents. +The graph manager’s main loop is the improve loop.

+../_images/improve.png +

The improve loop skips between 3 main phases - heatup, training and evaluation:

+
    +
  • Heatup - the goal of this phase is to collect initial data for populating the replay buffers. The heatup phase +takes place only in the beginning of the experiment, and the agents will act completely randomly during this phase. +Importantly, the agents do not train their networks during this phase. DQN for example, uses 50k random steps in order +to initialize the replay buffers.
  • +
  • Training - the training phase is the main phase of the experiment. This phase can change between agent types, +but essentially consists of repeated cycles of acting, collecting data from the environment, and training the agent +networks. During this phase, the agent will use its exploration policy in training mode, which will add noise to its +actions in order to improve its knowledge about the environment state space.
  • +
  • Evaluation - the evaluation phase is intended for evaluating the current performance of the agent. The agents +will act greedily in order to exploit the knowledge aggregated so far and the performance over multiple episodes of +evaluation will be averaged in order to reduce the stochasticity effects of all the components.
  • +
+
+
+

Level Manager

+

In each of the 3 phases described above, the graph manager will invoke all the hierarchy levels in the graph in a +synchronized manner. In Coach, agents do not interact directly with the environment. Instead, they go through a +LevelManager, which is a proxy that manages their interaction. The level manager passes the current state and reward +from the environment to the agent, and the actions from the agent to the environment.

+

The motivation for having a level manager is to disentangle the code of the environment and the agent, so to allow more +complex interactions. Each level can have multiple agents which interact with the environment. Who gets to choose the +action for each step is controlled by the level manager. +Additionally, each level manager can act as an environment for the hierarchy level above it, such that each hierarchy +level can be seen as an interaction between an agent and an environment, even if the environment is just more agents in +a lower hierarchy level.

+
+
+

Agent

+

The base agent class has 3 main function that will be used during those phases - observe, act and train.

+
    +
  • Observe - this function gets the latest response from the environment as input, and updates the internal state +of the agent with the new information. The environment response will +be first passed through the agent’s InputFilter object, which will process the values in the response, according +to the specific agent definition. The environment response will then be converted into a +Transition which will contain the information from a single step +\((s_{t}, a_{t}, r_{t}, s_{t+1}, \textrm{terminal signal})\), and store it in the memory.
  • +
+../_images/observe.png +
    +
  • Act - this function uses the current internal state of the agent in order to select the next action to take on +the environment. This function will call the per-agent custom function choose_action that will use the network +and the exploration policy in order to select an action. The action will be stored, together with any additional +information (like the action value for example) in an ActionInfo object. The ActionInfo object will then be +passed through the agent’s OutputFilter to allow any processing of the action (like discretization, +or shifting, for example), before passing it to the environment.
  • +
+../_images/act.png +
    +
  • Train - this function will sample a batch from the memory and train on it. The batch of transitions will be +first wrapped into a Batch object to allow efficient querying of the batch values. It will then be passed into +the agent specific learn_from_batch function, that will extract network target values from the batch and will +train the networks accordingly. Lastly, if there’s a target network defined for the agent, it will sync the target +network weights with the online network.
  • +
+../_images/train.png +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/design/control_flow/index.html b/docs/design/control_flow/index.html deleted file mode 100644 index 7a05c48..0000000 --- a/docs/design/control_flow/index.html +++ /dev/null @@ -1,367 +0,0 @@ - - - - - - - - - - - Control Flow - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Design »
  • - - - -
  • Control Flow
  • -
  • - -
  • -
-
-
-
-
- - - -

Coach Control Flow

-

Coach is built in a modular way, encouraging modules reuse and reducing the amount of boilerplate code needed -for developing new algorithms or integrating a new challenge as an environment. -On the other hand, it can be overwhelming for new users to ramp up on the code. -To help with that, here's a short overview of the control flow.

-

Graph Manager

-

The main entry point for Coach is coach.py. -The main functionality of this script is to parse the command line arguments and invoke all the sub-processes needed -for the given experiment. -coach.py executes the given preset file which returns a GraphManager object.

-

A preset is a design pattern that is intended for concentrating the entire definition of an experiment in a single -file. This helps with experiments reproducibility, improves readability and prevents confusion. -The outcome of a preset is a GraphManager which will usually be instantiated in the final lines of the preset.

-

A GraphManager is an object that holds all the agents and environments of an experiment, and is mostly responsible -for scheduling their work. Why is it called a graph manager? Because agents and environments are structured into -a graph of interactions. For example, in hierarchical reinforcement learning schemes, there will often be a master -policy agent, that will control a sub-policy agent, which will interact with the environment. Other schemes can have -much more complex graphs of control, such as several hierarchy layers, each with multiple agents. -The graph manager's main loop is the improve loop.

-

- -Improve loop - -

- -

The improve loop skips between 3 main phases - heatup, training and evaluation:

-
    -
  • -

    Heatup - the goal of this phase is to collect initial data for populating the replay buffers. The heatup phase - takes place only in the beginning of the experiment, and the agents will act completely randomly during this phase. - Importantly, the agents do not train their networks during this phase. DQN for example, uses 50k random steps in order - to initialize the replay buffers.

    -
  • -
  • -

    Training - the training phase is the main phase of the experiment. This phase can change between agent types, - but essentially consists of repeated cycles of acting, collecting data from the environment, and training the agent - networks. During this phase, the agent will use its exploration policy in training mode, which will add noise to its - actions in order to improve its knowledge about the environment state space.

    -
  • -
  • -

    Evaluation - the evaluation phase is intended for evaluating the current performance of the agent. The agents - will act greedily in order to exploit the knowledge aggregated so far and the performance over multiple episodes of - evaluation will be averaged in order to reduce the stochasticity effects of all the components.

    -
  • -
-

Level Manager

-

In each of the 3 phases described above, the graph manager will invoke all the hierarchy levels in the graph in a -synchronized manner. In Coach, agents do not interact directly with the environment. Instead, they go through a -LevelManager, which is a proxy that manages their interaction. The level manager passes the current state and reward -from the environment to the agent, and the actions from the agent to the environment.

-

The motivation for having a level manager is to disentangle the code of the environment and the agent, so to allow more -complex interactions. Each level can have multiple agents which interact with the environment. Who gets to choose the -action for each step is controlled by the level manager. -Additionally, each level manager can act as an environment for the hierarchy level above it, such that each hierarchy -level can be seen as an interaction between an agent and an environment, even if the environment is just more agents in -a lower hierarchy level.

-

Agent

-

The base agent class has 3 main function that will be used during those phases - observe, act and train.

-
    -
  • Observe - this function gets the latest response from the environment as input, and updates the internal state - of the agent with the new information. The environment response will - be first passed through the agent's InputFilter object, which will process the values in the response, according - to the specific agent definition. The environment response will then be converted into a - Transition which will contain the information from a single step - (), and store it in the memory.
  • -
-

Observe

-
    -
  • Act - this function uses the current internal state of the agent in order to select the next action to take on - the environment. This function will call the per-agent custom function choose_action that will use the network - and the exploration policy in order to select an action. The action will be stored, together with any additional - information (like the action value for example) in an ActionInfo object. The ActionInfo object will then be - passed through the agent's OutputFilter to allow any processing of the action (like discretization, - or shifting, for example), before passing it to the environment.
  • -
-

Act

-
    -
  • Train - this function will sample a batch from the memory and train on it. The batch of transitions will be - first wrapped into a Batch object to allow efficient querying of the batch values. It will then be passed into - the agent specific learn_from_batch function, that will extract network target values from the batch and will - train the networks accordingly. Lastly, if there's a target network defined for the agent, it will sync the target - network weights with the online network.
  • -
-

Train

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/design/features/index.html b/docs/design/features/index.html deleted file mode 100644 index 4ff66a9..0000000 --- a/docs/design/features/index.html +++ /dev/null @@ -1,328 +0,0 @@ - - - - - - - - - - - Features - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Design »
  • - - - -
  • Features
  • -
  • - -
  • -
-
-
-
-
- -

Coach Features

-

Supported Algorithms

-

Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into two main classes - -value optimization and policy optimization. A detailed description of those algorithms may be found in the algorithms -section.

-

- -Supported Algorithms - -

- -

Supported Environments

-

Coach supports a large number of environments which can be solved using reinforcement learning:

-
    -
  • -

    DeepMind Control Suite - a set of reinforcement learning environments - powered by the MuJoCo physics engine.

    -
  • -
  • -

    Blizzard Starcraft II - a popular strategy game which was wrapped with a - python interface by DeepMind.

    -
  • -
  • -

    ViZDoom - a Doom-based AI research platform for reinforcement learning - from raw visual information.

    -
  • -
  • -

    CARLA - an open-source simulator for autonomous driving research.

    -
  • -
  • -

    OpenAI Gym - a library which consists of a set of environments, from games to robotics. - Additionally, it can be extended using the API defined by the authors.

    -
  • -
-

In Coach, we support all the native environments in Gym, along with several extensions such as:

-
    -
  • -

    Roboschool - a set of environments powered by the PyBullet engine, - that offer a free alternative to MuJoCo.

    -
  • -
  • -

    Gym Extensions - a set of environments that extends Gym for - auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.)

    -
  • -
  • -

    PyBullet - a physics engine that - includes a set of robotics environments.

    -
  • -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/design/filters/index.html b/docs/design/filters/index.html deleted file mode 100644 index f5015af..0000000 --- a/docs/design/filters/index.html +++ /dev/null @@ -1,416 +0,0 @@ - - - - - - - - - - - Filters - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Design »
  • - - - -
  • Filters
  • -
  • - -
  • -
-
-
-
-
- -

Filters

-

Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information. -There are two filter categories -

-
    -
  • -

    Input filters - these are filters that process the information passed into the agent from the environment. - This information includes the observation and the reward. Input filters therefore allow rescaling observations, - normalizing rewards, stack observations, etc.

    -
  • -
  • -

    Output filters - these are filters that process the information going out of the agent into the environment. - This information includes the action the agent chooses to take. Output filters therefore allow conversion of - actions from one space into another. For example, the agent can take discrete actions, that will be mapped by - the output filter onto continuous actions.

    -
  • -
-

Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs.

-

- -Filters mechanism - -

- -

Input Filters

-

The input filters are separated into two categories - observation filters and reward filters.

-

Observation Filters

-
    -
  • -

    ObservationClippingFilter - Clips the observation values to a given range of values. For example, if the - observation consists of measurements in an arbitrary range, and we want to control the minimum and maximum values - of these observations, we can define a range and clip the values of the measurements.

    -
  • -
  • -

    ObservationCropFilter - Crops the size of the observation to a given crop window. For example, in Atari, the - observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a - square of 160x160 before rescaling them.

    -
  • -
  • -

    ObservationMoveAxisFilter - Reorders the axes of the observation. This can be useful when the observation is an - image, and we want to move the channel axis to be the last axis instead of the first axis.

    -
  • -
  • -

    ObservationNormalizationFilter - Normalizes the observation values with a running mean and standard deviation of - all the observations seen so far. The normalization is performed element-wise. Additionally, when working with - multiple workers, the statistics used for the normalization operation are accumulated over all the workers.

    -
  • -
  • -

    ObservationReductionBySubPartsNameFilter - Allows keeping only parts of the observation, by specifying their - name. For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as - speed and location. If we want to only use the speed, it can be done using this filter.

    -
  • -
  • -

    ObservationRescaleSizeByFactorFilter - Rescales an image observation by some factor. For example, the image size - can be reduced by a factor of 2.

    -
  • -
  • -

    ObservationRescaleToSizeFilter - Rescales an image observation to a given size. The target size does not - necessarily keep the aspect ratio of the original observation.

    -
  • -
  • -

    ObservationRGBToYFilter - Converts a color image observation specified using the RGB encoding into a grayscale - image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors - in the original image are not relevant for solving the task at hand.

    -
  • -
  • -

    ObservationSqueezeFilter - Removes redundant axes from the observation, which are axes with a dimension of 1.

    -
  • -
  • -

    ObservationStackingFilter - Stacks several observations on top of each other. For image observation this will - create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this, - a LazyStack object is used in order to wrap the observations in the stack. For this reason, the - ObservationStackingFilter must be the last filter in the inputs filters stack.

    -
  • -
  • -

    ObservationUint8Filter - Converts a floating point observation into an unsigned int 8 bit observation. This is - mostly useful for reducing memory consumption and is usually used for image observations. The filter will first - spread the observation values over the range 0-255 and then discretize them into integer values.

    -
  • -
-

Reward Filters

-
    -
  • -

    RewardClippingFilter - Clips the reward values into a given range. For example, in DQN, the Atari rewards are - clipped into the range -1 and 1 in order to control the scale of the returns.

    -
  • -
  • -

    RewardNormalizationFilter - Normalizes the reward values with a running mean and standard deviation of - all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation - are accumulated over all the workers.

    -
  • -
  • -

    RewardRescaleFilter - Rescales the reward by a given factor. Rescaling the rewards of the environment has been - observed to have a large effect (negative or positive) on the behavior of the learning process.

    -
  • -
-

Output Filters

-

The output filters only process the actions.

-

Action Filters

-
    -
  • -

    AttentionDiscretization - Discretizes an AttentionActionSpace. The attention action space defines the actions - as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing - a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop - windows to choose into a finite number of options, and map a discrete action space into those crop windows.

    -
  • -
  • -

    BoxDiscretization - Discretizes a continuous action space into a discrete action space, allowing the usage of - agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the - original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete - action index. For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action - space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1.

    -
  • -
  • -

    BoxMasking - Masks part of the action space to enforce the agent to work in a defined space. For example, - if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions - to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.

    -
  • -
  • -

    PartialDiscreteActionSpaceMap - Partial map of two countable action spaces. For example, consider an environment - with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual - MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can - map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to - use regular discrete actions, and mask 3 of the actions from the agent.

    -
  • -
  • -

    FullDiscreteActionSpaceMap - Full map of two countable action spaces. This works in a similar way to the - PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without - masking any actions.

    -
  • -
  • -

    LinearBoxToBoxMap - A linear mapping of two box action spaces. For example, if the action space of the - environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1, - the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the - action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped - between those values.

    -
  • -
- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/design/horizontal_scaling.html b/docs/design/horizontal_scaling.html new file mode 100644 index 0000000..add67eb --- /dev/null +++ b/docs/design/horizontal_scaling.html @@ -0,0 +1,394 @@ + + + + + + + + + + + <no title> — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +

# Scaling out rollout workers

+

This document contains some options for how we could implement horizontal scaling of rollout workers in coach, though most details are not specific to coach. A few options are laid out, my current suggestion would be to start with Option 1, and move on to Option 1a or Option 1b as required.

+

## Off Policy Algorithms

+

### Option 1 - master polls file system

+
    +
  • one master process samples memories and updates the policy

    +
  • +
  • many worker processes execute rollouts

    +
  • +
  • coordinate using a single shared networked file system: nfs, ceph, dat, s3fs, etc.

    +
  • +
  • policy sync communication method: +- master process occasionally writes policy to shared file system +- worker processes occasionally read policy from shared file system +- prevent workers from reading a policy which has not been completely written to disk using either:

    +
    +
      +
    • redis lock
    • +
    • write to temporary files and then rename
    • +
    +
    +
  • +
  • rollout memories: +- sync communication method:

    +
    +
      +
    • worker processes write rollout memories as they are generated to shared filesystem
    • +
    • master process occasionally reads rollout memories from shared file system
    • +
    • master process must be resilient to corrupted or incompletely written memories
    • +
    +
    +
      +
    • sampling method: +- master process keeps all rollouts in memory utilizing existing coach memory classes
    • +
    +
  • +
  • control flow: +- master:

    +
    +
      +
    • run training updates interleaved with loading of any newly available rollouts in memory
    • +
    • periodically write policy to disk
    • +
    +
    +
      +
    • workers: +- periodically read policy from disk +- evaluate rollouts and write them to disk
    • +
    +
  • +
  • ops: +- kubernetes yaml, kml, docker compose, etc +- a default shared file system can be provided, while allowing the user to specify something else if desired +- a default method of launching the workers and master (in kubernetes, gce, aws, etc) can be provided

    +
  • +
+

#### Pros

+
    +
  • very simple to implement, infrastructure already available in ai-lab-kubernetes
  • +
  • fast enough for proof of concept and iteration of interface design
  • +
  • rollout memories are durable and can be easily reused in later off policy training
  • +
  • if designed properly, there is a clear path towards: +- decreasing latency using in-memory store (option 1a/b) +- increasing rollout memory size using distributed sampling methods (option 1c)
  • +
+

#### Cons

+
    +
  • file system interface incurs additional latency. rollout memories must be written to disk, and later read from disk, instead of going directly from memory to memory.
  • +
  • will require modifying standard control flow. there will be an impact on algorithms which expect particular training regimens. Specifically, algorithms which are sensitive to the number of update steps between target/online network updates
  • +
  • will not be particularly efficient in strictly on policy algorithms where each rollout must use the most recent policy available
  • +
+

### Option 1a - master polls (redis) list

+
    +
  • instead of using a file system as in Option 1, redis lists can be used
  • +
  • policy is stored as a single key/value pair (locking no longer necessary)
  • +
  • rollout memory communication: +- workers: redis list push +- master: redis list len, redis list range
  • +
  • note: many databases are interchangeable with redis protocol: google memorystore, aws elasticache, etc.
  • +
  • note: many databases can implement this interface with minimal glue: SQL, any objectstore, etc.
  • +
+

#### Pros

+
    +
  • lower latency than disk since it is all in memory
  • +
  • clear path toward scaling to large number of workers
  • +
  • no concern about reading partially written rollouts
  • +
  • no synchronization or additional threads necessary, though an additional thread would be helpful for concurrent reads from redis and training
  • +
  • will be slightly more efficient in the case of strictly on policy algorithms
  • +
+

#### Cons

+
    +
  • more complex to set up, especially if you are concerned about rollout memory durability
  • +
+

### Option 1b - master subscribes to (redis) pub sub

+
    +
  • instead of using a file system as in Option 1, redis pub sub can be used
  • +
  • policy is stored as a single key/value pair (locking no longer necessary)
  • +
  • rollout memory communication: +- workers: redis publish +- master: redis subscribe
  • +
  • no synchronization necessary, however an additional thread would be necessary? +- it looks like the python client might handle this already, would need further investigation
  • +
  • note: many possible pub sub systems could be used with different characteristics under specific contexts: kafka, google pub/sub, aws kinesis, etc
  • +
+

#### Pros

+
    +
  • lower latency than disk since it is all in memory
  • +
  • clear path toward scaling to large number of workers
  • +
  • no concern about reading partially written rollouts
  • +
  • will be slightly more efficient in the case of strictly on policy algorithms
  • +
+

#### Cons

+
    +
  • more complex to set up then shared file system
  • +
  • on its own, does not persist worker rollouts for future off policy training
  • +
+

### Option 1c - distributed rollout memory sampling

+
    +
  • if rollout memories do not fit in memory of a single machine, a distributed storage and sampling method would be necessary
  • +
  • for example: +- rollout memory store: redis set add +- rollout memory sample: redis set randmember
  • +
+

#### Pros

+
    +
  • capable of taking advantage of rollout memory larger than the available memory of a single machine
  • +
  • reduce resource constraints on training machine
  • +
+

#### Cons

+
    +
  • distributed versions of each memory type/sampling method need to be custom built
  • +
  • off-the-shelf implementations may not be available for complex memory types/sampling methods
  • +
+

### Option 2 - master listens to workers

+
    +
  • rollout memories: +- workers send memories directly to master via: mpi, 0mq, etc +- master policy thread listens for new memories and stores them in shared memory
  • +
  • policy sync communication memory: +- master policy occasionally sends policies directly to workers via: mpi, 0mq, etc +- master and workers must synchronize so that all workers are listening when the master is ready to send a new policy
  • +
+

#### Pros

+
    +
  • lower latency than option 1 (for a small number of workers)
  • +
  • will potentially be the optimal choice in the case of strictly on policy algorithms with relatively small number of worker nodes (small enough that more complex communication typologies would be necessary: rings, p2p, etc)
  • +
+

#### Cons

+
    +
  • much less robust and more difficult to debug requiring lots of synchronization
  • +
  • much more difficult to be resiliency worker failure
  • +
  • more custom communication/synchronization code
  • +
  • as the number of workers scale up, a larger and larger fraction of time will be spent waiting and synchronizing
  • +
+

### Option 3 - Ray

+

#### Pros

+
    +
  • Ray would allow us to easily convert our current algorithms to distributed versions, with minimal change to our code.
  • +
+

#### Cons

+
    +
  • performance from naïve/simple use would be very similar to Option 2
  • +
  • nontrivial to replace with a higher performance system if desired. Additional performance will require significant code changes.
  • +
+

## On Policy Algorithms

+

TODO

+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/design/network.html b/docs/design/network.html new file mode 100644 index 0000000..2bc43f9 --- /dev/null +++ b/docs/design/network.html @@ -0,0 +1,290 @@ + + + + + + + + + + + Network Design — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Network Design

+

Each agent has at least one neural network, used as the function approximator, for choosing the actions. +The network is designed in a modular way to allow reusability in different agents. +It is separated into three main parts:

+
    +
  • Input Embedders - This is the first stage of the network, meant to convert the input into a feature vector representation. +It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs.

    +
    +

    There are two main types of input embedders:

    +
      +
    1. Image embedder - Convolutional neural network.
    2. +
    3. Vector embedder - Multi-layer perceptron.
    4. +
    +
    +
  • +
  • Middlewares - The middleware gets the output of the input embedder, and processes it into a different representation domain, +before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of +several input embedders, and pass them through some extra processing. +This, for instance, might include an LSTM or just a plain simple FC layer.

    +
  • +
  • Output Heads - The output head is used in order to predict the values required from the network. +These might include action-values, state-values or a policy. As with the input embedders, +it is possible to use several output heads in the same network. For example, the Actor Critic agent combines two +heads - a policy head and a state-value head. +In addition, the output heads defines the loss function according to the head type.

    +

    +
  • +
+../_images/network.png +
+

Keeping Network Copies in Sync

+

Most of the reinforcement learning agents include more than one copy of the neural network. +These copies serve as counterparts of the main network which are updated in different rates, +and are often synchronized either locally or between parallel workers. For easier synchronization of those copies, +a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent. +In this wrapper, 3 types of networks can be defined:

+
    +
  • online network - A mandatory network which is the main network the agent will use
  • +
  • global network - An optional network which is shared between workers in single-node multi-process distributed learning. +It is updated by all the workers directly, and holds the most up-to-date weights.
  • +
  • target network - An optional network which is local for each worker. It can be used in order to keep a copy of +the weights stable for a long period of time. This is used in different agents, like DQN for example, in order to +have stable targets for the online network while training it.
  • +
+../_images/distributed.png +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/design/network/index.html b/docs/design/network/index.html deleted file mode 100644 index daf030c..0000000 --- a/docs/design/network/index.html +++ /dev/null @@ -1,310 +0,0 @@ - - - - - - - - - - - Network - Reinforcement Learning Coach - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
-
-
    -
  • Docs »
  • - - - -
  • Design »
  • - - - -
  • Network
  • -
  • - -
  • -
-
-
-
-
- -

Network Design

-

Each agent has at least one neural network, used as the function approximator, for choosing the actions. The network is designed in a modular way to allow reusability in different agents. It is separated into three main parts:

-
    -
  • -

    Input Embedders - This is the first stage of the network, meant to convert the input into a feature vector representation. It is possible to combine several instances of any of the supported embedders, in order to allow varied combinations of inputs.

    -

    There are two main types of input embedders:

    -
      -
    1. Image embedder - Convolutional neural network.
    2. -
    3. Vector embedder - Multi-layer perceptron.
    4. -
    -
  • -
  • -

    Middlewares - The middleware gets the output of the input embedder, and processes it into a different representation domain, before sending it through the output head. The goal of the middleware is to enable processing the combined outputs of several input embedders, and pass them through some extra processing. This, for instance, might include an LSTM or just a plain simple FC layer.

    -
  • -
  • -

    Output Heads - The output head is used in order to predict the values required from the network. These might include action-values, state-values or a policy. As with the input embedders, it is possible to use several output heads in the same network. For example, the Actor Critic agent combines two heads - a policy head and a state-value head. - In addition, the output heads defines the loss function according to the head type.

    -
  • -
-

-

- -Network Design - -

- -

Keeping Network Copies in Sync

-

Most of the reinforcement learning agents include more than one copy of the neural network. These copies serve as counterparts of the main network which are updated in different rates, and are often synchronized either locally or between parallel workers. For easier synchronization of those copies, a wrapper around these copies exposes a simplified API, which allows hiding these complexities from the agent.

-

- -Distributed Training - -

- -
-
- - -
-
- -
- -
- -
- - - - « Previous - - - Next » - - -
- - - - - - - - diff --git a/docs/diagrams.xml b/docs/diagrams.xml deleted file mode 100644 index 10c4dc6..0000000 --- a/docs/diagrams.xml +++ /dev/null @@ -1 +0,0 @@ -7V1td5tIsv41/hgfumnePjpOnNlzkkwyOXf37kcsYZsdSegi7CT76y8gGkFXG5DohpZcnjkzEhJI4qmqrnqquurKvl3/+pSG26cvyTJaXVFr+evK/nBFKSEBzf9XHPm9P+KR6sBjGi+rNx0O/Ij/G1UHreroc7yMdq03ZkmyyuJt++Ai2WyiRdY6FqZp8rP9todk1f7UbfgYgQM/FuEKHv1XvMye9kd9xzoc/yOKH5/4JxOreuU+XPz9mCbPm+rzrqj9UP7tX16H/FrV+3dP4TL52Thkf7yyb9MkyfaP1r9uo1Vxb/lt259398qr9fdOo0026ATb3p/yEq6eI/6dy2+W/eZ3o/w9UXEGubLf/3yKs+jHNlwUr/7M8c+PPWXrVfXy4yrcFXffyh8vknW8qB7vsjT5O7pNVklaXtV2F350/1C/wu+znR95iFerxjuXYeQ/LIrjySarhIVa1fPG+6zyLz8eruLHTX5sFT1kxdN0UZ3l5s/gLaru2kuUZtGvxqHqln2KknWUpb/zt/BXmV/h95vLbeDvD/w8iAvjQvHUEBXbre5vWInoY331A0z5gwopOWqurxG0Zbh7qs/rRPCjW/wzBMFKB/IPSsNlHB1Q2ySbSAWwnhpgc3xauFLLtSCuTIKry23DGFwDC3HNFCHptTW0sNEASVuGJGEqkCQAyTBfp6axrIH3wfI8xfjlK/jr+KkALGDXjgCZG1z79uHPAQDWGtMEUAl+tF8To83ypvA18meLApoCkCZe9dpe3KFVeB+t3tfeQeMmvi//kaM18Pbndz39/b/FJ+W3sHr67+qDX4UmC9PHKGuLa7Rs+UUQrMa9dyS3nh9Lo1WYxS9tb0qGR/UJ35I4/3a1LNQWlktC7Rjya+yS53QRVac1/RtwJUeUKtsSrrW/EeBaObbh78bbtsUbdsd8advq/G5O4I08gTjdJ4CvJJyQP9j/yoNO1FgPUhNvgJrggjVwwRJcD3vogkVFgT7F4BGdLuX04HWay5ZgKQ8OaFvn6qCxAaPryiKDQMW6hShqQZEEA1Fk4kp1kjK6bx5GRQGdLa6AEm20bZlRVeFFwtXxeRflBzZR9jNJ/y5ZreIXptEyLqIDK48R4mSTP1jGOQDx/fP+qalRw4Thge22gZStjtqiAciR/U+JY/ZU/Df6tc1/clght01W8eI3h3YXrrer4k0PabKuz6gAzyFEpPuR9iZEmgGkF2kUZgVo4aapoPHmocA3uf9PtNfcTQHmXr3jrIY/S9KDoNQn798cbsrwYrmMi8Ph6nDV8D55zsBpKBtANpg9oWzARbnW7rCIeXNUSx2vYX7exA9Jul79FvW/fseuBIzeFsdWyeax/CX5D8lfyb9fYR2ih/B51VoaKhkpv8YOpaIB7hwGA4ZNB4NR4J1LxS7m8rCLS4j3QpDf+4ybiGX5gNuZZf2eTfSzWGA2L4V8RbttsikMjKmQG0AiWm2yJ2ATigL3SHo43/ynZAJR2Lr3/K41bnJ1iJMOi/wGRfnx98WNiRfh6qZ6YR0vl8XHSIXgICbWUKJlKBtSvq/6hZIU5FiFphJfT06EKADxbRG/1ZpmCO9rO23giSsgOpj2tYQLiVG5Is5X/MJ9DC7xARktCOc4PpYMyOZfkPTSy5ReEghSIjKDM0mvY2mWXhh9pc9lTP0UloFVeazwqeJN6Um5uf+dy8rmfretRebsV9pOlVG90tpimtzywFJLpPyYirXWGYT3U+5PP28RbQXuMRmANtWFtgfQXjwlya6D4hgPaZWfu1RHWeRDqCT0rX0f5YDCZFNyv4vSFxiinqduzhnz+BIkfU1A8mucidf4K84aTmP+7N/VdzjJn9w7ZVfNaKjpYvL6WEN8TDGAYJ5zmo8p+n6uKEiKfEzxCzuk28cUv5fw/tE+Zq2Yb13WyRkIuxAHMZGlO1XYPVuPsIsB0uzCDsksIY1wEev0tBGTEwgVha6EnuRmWflS/bYIHl7ya4hBEvXVP7Us1Rb2jwSja1JbAnSklYC0S0/GCc3EADMhiAqFRkKWiFJiJCCv0i5E2KcVecIx3tQvLJ7T8jcfXnwJ0zi8X2HW8YisI7EsCLa2tCOFvEq4XNaQ5gYijTfFIlAo9M8wLV9K6tebGWqEeChPetjx2YRYlpRUAvGA4uwzX/WboQm3X61Uj2e0J0Cs02MTdu02/jzW9gwmylwSy+/ZSxJ0nzA6WLEvJDI/SsSNFWfvZHEWrjRV8rJXgMXkx/EngM30vb+dKlYRGP3lS2bhSIPKQrE+tV17ljxn27LClJ9u6so/oxfnTFk7ZsMY7HKNn2E0o0OEFbgtBqosoTuadBwR49swJBQshxjyCxYj3pyHwZg/VBDtiO9PaUckSfZGTB9tr6p4P9rGu2RZFlnwworH/U5WHtgg33Mc3yNyAI4LYCe6aGEbBohtxqdVh94gfNbROqk+CtV5YOQvaQmjLfK3B2zovRy/wDfKL2Ai8vTUgs7JgiIQpM8esnAlaAjwstThO1xlFKwysv3MnkQlVCwybLZWccbsStfUXEC2jVHeXUDJHlcYawqxwMFb2IFAYJu2d7BjQHB0esixZ+trxWAcWJbTX4gxnrNikzDWB6uuAk42oF+IQV7i2KoQhxrlJwIfillSQRhbJEJY0HndKfkkJtn3nGwK3XllEck/s+xscFX0yM0WxRZ3zk3jyjF85ZBLwBQrB1c6WfuDVVxuZU8eAOyttgdINhwNOPPmAxxSiRNBZ4y7r6gJlVgDT1wJhSTtQqWiJZwDKaRVFKabg3ZWBlm5B3jhm69qpryGVcL0E6qJ8nUhsfK8Xe6L9ipnqRGmockdbHK9+boOu2dVAvPqzW9tqapuiym+OgvaaJ9ewi1eKRAlYEp/nE8VOA/J0bmFrwLB6HgxNzCu6xPmOpbti86BT5REj2ILlMMoA92FWD7rzjk47tgTWE/faxiRK+577UIabZUkRXL9ISlo1M3z+qrZqKJMvRfxUBavJU3bzpNsmzTzAWTAlXDmujZUuLJQyF0VmC3jlxaW7v89F7N73heQvqvQuSlC5MIXrl/NH1XVFvur7LZFd8n9sVWSo1Adz79Y86XG4fJz+VEUp2PFSWycQHxZg2dN4uRB1x2Hg3TCRQMPjHGYz033oJsuT6Iggl0IMjofgjpT2W98boUjWZj1DVryYDIbbemxmuj482ki9KSj4nFJZF2EXzMtJykBt7faQFda2oNp6Wqd5B3QsT3uaSALob0t2VyurWuf5wHMCmKF/8AKjLbBbd61BrIHHshrEkFCkYAHmKDjmaowzTj3VX3B8thdXPzM6gMl7Fh+sHpL8XX/E2XZ7wrw8DlL8kNJmj0lj0X3/s9F/N8U2FeIsU5JeVU4mhQXX3CbFJc3dEvNYO5qcAN0GMxA+TCHeRxbe8L9DUO4RMYE7wvsMho+Kc8XVw5PU08w+KV7+DxX5DKPPoHv4nr914tebPuE0Yyhf16Ne/QR9J5MqcxKEjFHEIZ63PPRSiVeydW0nR/IL98JMFhDjj7B7tFB8JVsxSS8D93LeL1NE+w/enWKUykOXbB6CTZdcYN/IZuexttKmadHzbKVoti4p+6X9sRu9OBK2tyPPtMHfuOxJ9C+5iriV6KKm6sEMvdDVwarbtg+aw7r0qvPJPPSp6QBggEdK86oPPQY1vxQIOqroWSJJe4Is2y4e5zJOvMz3jlkFJaQhM3v2LYo0l+Hm/BRsr/LPI/KBKU8tAHrbAMgW4+V6ORsc4PdhR/dPwzRyWUY+Q8LFcpHFCVEcowE0Ihs9LqsGoCIbMtJqMG9NKvoJVpp1T61q6MZugc6NUgykfp0b0DQokf3fHpvu4PWw6UT+Ut2uu6pQUmodySSqnlfUjTPW2GMG9Eoq7DW5Qfnqls4+ZfkBpuh6GKXC8IZgwmKt/IlHhV9CEoEojSlokOXFhX93BQd7M2ZVtFn86bPS9FtMGzVgyjpU/Qpa7NR0fUouk2AovsTKvqA9uoYNveHzVS2rVlb2Jy7gQA2jJtVxM1Uto9ZV9xccy24zPbBJNhIW0JvaFtmCQbOF6DpYkhm2xP604Siop8UOE+r6FNmilHRJwqcp1V0ZMgGoQQCZ5tOGDgTZMjOX9FB4GzbEwbOZDaGLHA9Oxyk6BHJVd2beUV3xNp6wiQpZsKrhdXr+pQkWbR5idNks0aN16HxxLOhLMFlg/ClRL3SI1t2Cm6OuNhTBlGTs2UqugASgmyZEv1zxFI96kq0TxtbRpEtGwiT6BhJjKQ235oiW3YBmi7Wctp0Qt+a6zUqejdK4l6LiRUd2bLzV3RXnKozraIjWzYIJXF/WK7oECV9io5s2fkruucCRfcmVPTZAufzYstcCzActrQiQRdbRmGojGzZeWq8SwDzaktqSPWxZTaM1wHtYr3E0U/Ec5htEJwAJunbSgJPF5owrgcbJRHN4Vy2TQCXPS2e0PdGUlQFKUokvrk+UlQy3X4iz+q8QihAihJ3whDKlmWLMYQ6M00X6TbiTRhC2bPNJDsvRQek6LSKLgt0UdHPS9EBKTqtouNu+kEoAVKUuBAlbYrOZMloVPTzUnRAihLet3sKRWeyTPkkil7f0eFtpIa2BDyUD7lawmZxMLRnwaDZlkZb4gyw0zCjALOPTRZzEvimHxKrGVWghx5vy9WiQiS2XM12SgZT2TePMjyx9dcrpdiAavYkRb6eJGuhxpJCEiTebJ+LLpM5FJmEyjpbB6qWHyXWNBBQC2Sl2bIxvirmnxAGQ9rkOUPc+vUtAOTxxMgNyOdO2qa4E4PXmhGTLjiajYdrA2NI5+EcWL8FP5xwOrT1MAETVayBfdpPaMVLGIxtk/tdlL7kdyPZ8FDmPpXGTwW873Yl3kX4RLztr338I0RQafQzTJeKLna0C5AL3l351+GNTewMEK9DOdpqRTt14phtmLYooRJHXTYaTYlD4Miic3Pt06tt1F9FotkqvV5HTbFOzGGqrFP/pRRaJwcG5OHiRMOEhkSRIam7PM9jSBxoN9RO/OqblnDOA7865WWYbeMD7Nvu2FCDp3zkV/19WqFmrnv5XSkuvWcNrFXymN/TaYKXcVsDOyHSFHS2PYMDkd+k8HjHlBbZo0albQ0q3THDDzVa8FYkk13qg3No9IBNCAb5igfBoiCYfV0eB0LjSKDhFs8U79IOLDD949TJf7KLWaKVUelhDuBNclnaFg/jdVjc8Nql+1yI1bdkF5ceqf3hPsmyZN0pb7WfKLqFWaHt78PdNloU6DzEvwpT9778yBt+1OJH8sdPWbbdle7tXf7vY5w9Pd9f51Yxf/K1CN834Y/fuyxa7/IDiyRcPOX/T8OfxQ0JdyWRdxevH/P/hqvHJM1PX++ut5tHNUuKL3TxO4xWbmWCJWSYGjbMCTSvKBflIxJhRenKOw0zW3wXests+UT1ilKeeuzELWIJuS3qVcL5mk3yg+4TrsYOxCI8hEZ5NUlerTOV18C2dMsrQXk1T16HuoXGyauv3b5KAgiU17nlVXmEOZG8ggl5GgRWNyWCAnuCwA7NOaunRFyG8mCePLBzNWB1n3t9BkxHmgYFdqTAumcrsOIQa/UC66LAmiewytOKkwms5+sWWA8F1jyBDc5VYGmPSzDgDN/uO8N2Os9QoBQ+KoV5SuGfrVIw7cwUrL/9Z/mQWn9us3gd/7cqxBWk2sSy+075Ub3FJQBJGV5b36y3l5WhKdmg5GHGZlZD4zGJoXENWX2ZuDK6vYyiaJpc1YbGw5SNeQLrzVc26WFKxDx5CAxJOR9vwBwx56zegGFKxDyB9c52xRVbymgQWFjGKt/D/uZ9eWKL9K5rT+rMY7JiXtPiydZCQ8oFgWnpZQ3qwUnaWAMPkxUGCqzy8oCpBJbx4FCfwMLGEt+SVbz4jTzX0WvjxEQXMurzmhoZox4od7uHywMkrP+xjjPU3lda1ILa1km1l9ewN9D68P3rRDiN3Ac7aTbBEmGS9DuT7aFTgxJsc/D9r3cIlGxnmehb+wwC5WkDigKg3idJlt+2cLvNYWl0pdj3mCgxFA8iqkD9QGGABFViSWBVsl3Qh90GP6D2DUq6TmomIaH25cstogScDgs4HQG9nhIo2P3x60cESgKUGNvJ7J4+mOAMjK/vfmTRFi5Z3999jsJ0E28ecTkbgqtoJ2W4BjJcHRW4SsiWm88I3ADgiJgV8gIGgONYtoBTEVv7sBnF15s7BG4AcBRsyZMAp8+SQlLkNsyiorHGomhDhWHBieootkKd1I4GEvLk7hsCNwA4B/ApEtpLmzoGkE+psw2fqr77COMQqxqIu1gCG8BYz51XjyMFmGDuYcLcA/di27kHQzYRgqRl7/YTmwuvtu0nAdaoGSiw8xXZch8U5cEkeTBkO9LxBsxhfucZCgwYZO5uFll+96l1m8ZZ0UIX3aZet4nZtN9t4oUV6r0mSOt9+PDtEyI3ADnbFTWMeVMy5wGk7m5X8bbMLVrfvv2JIA5RP8qGgMgXZ/UgQhoPoRvIG1DRRZ9Y/yCR9z56Cl/iJC3byd+uEkyCDMUSJoshJVt3kFcNJeXVAuh7z+J7U0vie1NrtliMWrgB1jx54KXqs8dipO332RbpI5Oo23nG6FiMWsh+miewlins59ECSwnRLbDIfhoosKawXUcLrNNzhgKBRXrWQIE1Zkv5sQLrClXMGgQW0rOf8mDy6Uu4CR/PZDjz1LvnBEIvkEyi0bb/pnYXmnxCWKgktf76jHDBgh/XuRaVaGjJjxK8IP/6BwIl0ytem9EFEx9Uph4myLB+eV5l8bsQG2zIKXGPDdErXjmsHjDIq/6IVg/vtqvwN8IloU6tfrC0GUEC6x4/bnbR+j7/gYgVqJHz7X6s+I4Y9VjBUsc/bm4xPzFkCaPA16jnIkyQa6J8oG4Durtn3HQo8zXsOWGCm0OLjiXJ5h2viEHAQP2LCJgrWb50+RpEsk30848/ECeoWI7bjxOfnaYeJ0hmfLnZVyohUv3+u8wIatMpB7sAaWVKBbJT4El5UVqLJ+WtXYBgTc6UWny/DRdN1yHXbuOvZ/wjPeb08SQqDoc2TpZ5U6nzl2VxRdUsyzg42jhZ9l9b8M9NloklVgzoFmas0DJOmOnFCDOb2DJTFGbThJmpF+bBERROdDZPHuxLMW7Enti4YeGUccLsXIwwuxMLM05/ME6Y3UsR5rrGdCphxskQxgmzfzHCfJSbMeB8jxx3vkuOOF+BMuGIbeOUybsYZfImZtdgzeSbmTQMMq2DBKhGGiSoLBlS01SUu5i9mtEm8SGULZvkGrPAM3GBdY4kSUWb1nn+eJuEU7SNE2ZPQ+g12LjhkGrj5IG3Nj5/4+aIuXndxg0zQKYJs3cxK7XrT7xSwyrom1XR3zx7WmPIIOkjJ8IrZSqmCRk8zNbMaYhciSEKjCneBIboSBrD9qfN1uCsbvOEWUNdxTzCzHgB/VTCDNsDvJ2xyIpX2DlJOQ8TBXMaJVmiINDg6g+XB8i1X/aQ5HG67IJC4zl1GbYquNRZoKNQC2wRNK8PNG17dCVjri94gPIo2HzRX/doH2y6xilTydxrHKesQjVBnUQvxtqGK1PJzOyLHa6sOs88o0WFfN+lzloeBxoBvosbXJOZQIOdDy517vJITRODx34DqQ80ybBsnMKsRDVFe9qPsq5ZolQya/vtzGQeCaOY6nI9qwdGXROaqWS09tuZ0DwORgo2YPbCqM/mQn4G5zWrwJgcva7qs7gSHufurUxvHgejA6idXj5Om6pKhnC/1VnOIxMmVNyJlAcqPaSArsnOlNeZYsJkjoQJT9i2EybGbCAFWdgjtwkxEhxx/vgsLo4pN0+Y56xixing5smDMdvGxho3x2NHnK/AuEFO8U1OhB7nfDFHGCbT73zpmg9NJTO+39B86JFOdCBqH2Vzsf2SOd9vcVb0WMW0jgVU19xoKhn+jTAOZCyY6PLPqJeQXHzDM6RHVobBlHgfaaxtorTNukvPD77ux8NRwc0f7doP9bGbiDmve849E/psPjS56Uzb+2E+MxR3Cz5UUHW/7SACus8Y7R3X96c5Kmcb75LlZHMhzqnO03ZFPEhfrYS2Ok/b6tZmjNJVR+ltG2LSTkxxtEJuJo7aAuX4R12hbXZk38i59u3Dn3htFlw3XmXtG7I339UlBXU41rgp3NWAeiDXAz7EwwQ98EQZJt1y7xBbsdyL34B79TqEG3rpf3z8C53yIcu4DXH33JnCLVsyWvIbAjkMyABMFQr4lsRmNZWmqUK2bNDkr22UxtFmUfQl+yvazwdFKI/nsWRAEqoNSUkc9Gub/+5qy5vFazYwJgLjQy1BB5kVtKMiiKO2oIjPpMSgaBZn0JgSDFvgU1kuci3HrtszFHtbdp7e5xbazG/GO0I4xAidKBwimNafUzd0tQQ3WOCwb8+cAmdMychIY8yoO/z0szHGWMSgbCcGI47d7XBqC94J1jAoCOMteiSeukoYaqOJJQxHo8gs0WeeTyshNzoRYPe+UzAZAwB78BfRYk7AXEugzooWj5IEJudhmijV0I2DCTKfX6J1kiLNAsGiAlhiYzeImzaOhUpZT+TKjgfRs3qLgfShCBtkIIqnqaLlzoeipPlClP1M0r8ROAicLSx4rjMbcHyHanNbwmNUxZVN2PLfm7WxaWNQ0RTNm10dClfx4yZ/usgvG+XH3xd3r9gfflO9sI6Xy7LGTyYMB3GBWLxWnjcOHk+EhzoSf0SGCFVSHdk9ee4cqyNbpZA8C9oiiZzhMGkmiYgj5pH66yPzyLHnnPEVkgxGhFgh+WoQaIFs4Kw1kszv1GlkoLUy0I4xtWFE3F5xZIWkJc66G1UnRjo56OnqI/lNwfpIjTpADdIBuFj2VEhSIfUyVu7hN9BYIelAngIrJAcu42KvqxnrIx0YKWF95MDOc5ZYkDxldSQ3fVgdOR5Ilw4pdNVXH+l0l3KcY3Dct3XQkQxgq/bLjVi7h9/xt7dZU3rH9+ZpkjsuaRmN6ajXGiWCLWO+Y80W5zvd3B3G+To7FtWTjFp6a8yoSmr57jWjvuOR/X9BG1dZFuDVkCggp19tPBvp4KAm4wTdlDpL1YLuWuL5HZWWJwkzsrPGCXNwkcJMLWLrFmaFZCoKsxJh9k3JP6gWZkZmdEH4soCCbo6ga0gyDI69+KwulAdz5MGUqg7Fho/Ysxo+ioJumqDbFyro7qyCjhuFjRN0U0bUqxb0IJhT0HGDsnGCrmub/MyCTke4LkdfzbOPu5rT+Us7r6ZACbGPpXFKaEofF9VK6M6Yu2Hk7WX0ZdLF9m1B5hAlAHenuMgyIKptH2wf8M/yIbX+3GbxOv5vtRsPCxDaBQjU6knD0vm2drmYo51xQeOV+q0FzTXGq2TdnptzZCage3nsvJoC64X5W9ME3dPAEww3fJgCNU0efGOKU9QaPsc5/WrjDZ+HKVDTBN270BXe9eZc4fmt7u1SgAGJ3Z2ycaQk2jQBiYeJyznNlWSeq+0bUx3dY66OZNhsf87EpYeJS9MEnc8WvzRBZwGdU9DhUGPerguJw1Hr9JzEoYeZsDnNlywTFmgIK4bLA8wO/GMdZ6jZcs0u+vCZqtmwidSH718nwvCcNroHNrsOOjAMWA+G2tpQeLCn8Pe/3iGKMhT97ijAY32a6GlDEbYcfp8kWX5Pw23ZtF1sRlECjB0q+hXXs0SQYX8KnoFvwspUwOrDVj8fUDVPyZsHfaqpzcD6kGv88uUWIQQQkh4/J7DnatXE9yE1uxF/RAglWtgdhPYvkPoghE1Qvr77kUVbuAp+f/c5CtNNvHnEFXKI2nZb3n7MAxnmjgrMJWzSzWcEdQio3em5/niFORJQVZAHvIa3qcg3dwjqAFBpz/7q+YJQHzJCt2EWPSZp0dcdo5eTEScj12N9tlnCHN19Q1AHgOrQkXygPjWGXFKdvvmUhsu4LLFAiPuTOaR7+2AeCvVQvnwmiHqMsRhyxmQOTzu3kznG7BbvyR4fuX+PETbj/r0AiyGNE/Q5q8AD7AdjnDwYs59TreFz3M7lX7fhg2znzSLLUaPWbRpnxXwgdOF6XTjm2ONcOJ6YVe3BMdpdTXiJ+3UDSQduZgXzmArCBHWueMqO4tOg84zxW7jp22uDLxUJYp2JSDBxC7dqkajvTyv1/O0T2v4h4bvfvX4zf65MZgBTIrereFuWiFg4EX7g0s6ccfByFNTDC5MjCOpAVpV1kw0z6ixMjryPnsKXOEmL3Ih1u0owWT20lravTKgvBUaoJpgrd7Q91u0mP83CZXfwFghRaQMfAChLTDOqAD8iq9UrkMs/Pf9wauGQvoHLKxV9WwmMvkQN1cAIPaQPz9GqNLDWvviSA1qO60P4xIpZIsDnWxY0o5YMv+PNaP40TZKsGebkd/npS7KMinf8Pw== \ No newline at end of file diff --git a/docs/extra.css b/docs/extra.css deleted file mode 100644 index df27985..0000000 --- a/docs/extra.css +++ /dev/null @@ -1,8 +0,0 @@ -.wy-side-nav-search { - background-color: #79a7a5; -} - -.wy-nav-top { - background: #79a7a5; -} - diff --git a/docs/features/algorithms.html b/docs/features/algorithms.html new file mode 100644 index 0000000..b9cfd48 --- /dev/null +++ b/docs/features/algorithms.html @@ -0,0 +1,253 @@ + + + + + + + + + + + Algorithms — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Algorithms

+

Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes - +value optimization, policy optimization and imitation learning. +A detailed description of those algorithms may be found in the agents section.

+../_images/algorithms.png +
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/features/benchmarks.html b/docs/features/benchmarks.html new file mode 100644 index 0000000..d43e339 --- /dev/null +++ b/docs/features/benchmarks.html @@ -0,0 +1,266 @@ + + + + + + + + + + + Benchmarks — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Benchmarks

+

Reinforcement learning is a developing field, and so far it has been particularly difficult to reproduce some of the +results published in the original papers. Some reasons for this are:

+
    +
  • Reinforcement learning algorithms are notoriously known as having an unstable learning process. +The data the neural networks trains on is dynamic, and depends on the random seed defined for the environment.
  • +
  • Reinforcement learning algorithms have many moving parts. For some environments and agents, there are many +“tricks” which are needed to get the exact behavior the paper authors had seen. Also, there are a lot of +hyper-parameters to set.
  • +
+

In order for a reinforcement learning implementation to be useful for research or for data science, it must be +shown that it achieves the expected behavior. For this reason, we collected a set of benchmark results from most +of the algorithms implemented in Coach. The algorithms were tested on a subset of the same environments that were +used in the original papers, and with multiple seed for each environment. +Additionally, Coach uses some strict testing mechanisms to try and make sure the results we show for these +benchmarks stay intact as Coach continues to develop.

+

To see the benchmark results, please visit the +following GitHub page.

+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/features/environments.html b/docs/features/environments.html new file mode 100644 index 0000000..132f718 --- /dev/null +++ b/docs/features/environments.html @@ -0,0 +1,277 @@ + + + + + + + + + + + Environments — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Environments

+

Coach supports a large number of environments which can be solved using reinforcement learning. +To find a detailed documentation of the environments API, see the environments section. +The supported environments are:

+
    +
  • DeepMind Control Suite - a set of reinforcement learning environments +powered by the MuJoCo physics engine.

    +
  • +
  • Blizzard Starcraft II - a popular strategy game which was wrapped with a +python interface by DeepMind.

    +
  • +
  • ViZDoom - a Doom-based AI research platform for reinforcement learning +from raw visual information.

    +
  • +
  • CARLA - an open-source simulator for autonomous driving research.

    +
  • +
  • OpenAI Gym - a library which consists of a set of environments, from games to robotics. +Additionally, it can be extended using the API defined by the authors.

    +

    In Coach, we support all the native environments in Gym, along with several extensions such as:

    +
      +
    • Roboschool - a set of environments powered by the PyBullet engine, +that offer a free alternative to MuJoCo.
    • +
    • Gym Extensions - a set of environments that extends Gym for +auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.)
    • +
    • PyBullet - a physics engine that +includes a set of robotics environments.
    • +
    +
  • +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/features/index.html b/docs/features/index.html new file mode 100644 index 0000000..a29a7e0 --- /dev/null +++ b/docs/features/index.html @@ -0,0 +1,255 @@ + + + + + + + + + + + Features — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+ +
+

Features

+
+

Features

+ +
+
+ + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/fonts/fontawesome-webfont.eot b/docs/fonts/fontawesome-webfont.eot deleted file mode 100644 index 0662cb9..0000000 Binary files a/docs/fonts/fontawesome-webfont.eot and /dev/null differ diff --git a/docs/fonts/fontawesome-webfont.svg b/docs/fonts/fontawesome-webfont.svg deleted file mode 100644 index 2edb4ec..0000000 --- a/docs/fonts/fontawesome-webfont.svg +++ /dev/null @@ -1,399 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/fonts/fontawesome-webfont.ttf b/docs/fonts/fontawesome-webfont.ttf deleted file mode 100644 index d365924..0000000 Binary files a/docs/fonts/fontawesome-webfont.ttf and /dev/null differ diff --git a/docs/fonts/fontawesome-webfont.woff b/docs/fonts/fontawesome-webfont.woff deleted file mode 100644 index b9bd17e..0000000 Binary files a/docs/fonts/fontawesome-webfont.woff and /dev/null differ diff --git a/docs/genindex.html b/docs/genindex.html new file mode 100644 index 0000000..322d454 --- /dev/null +++ b/docs/genindex.html @@ -0,0 +1,937 @@ + + + + + + + + + + + + Index — Reinforcement Learning Coach 0.11.0 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
    + +
  • Docs »
  • + +
  • Index
  • + + +
  • + + + +
  • + +
+ + +
+
+
+
+ + +

Index

+ +
+ A + | B + | C + | D + | E + | F + | G + | H + | I + | L + | M + | N + | O + | P + | Q + | R + | S + | T + | U + | V + +
+

A

+ + + +
+ +

B

+ + + +
+ +

C

+ + + +
+ +

D

+ + + +
+ +

E

+ + + +
+ +

F

+ + +
+ +

G

+ + + +
+ +

H

+ + +
+ +

I

+ + + +
+ +

L

+ + + +
+ +

M

+ + + +
+ +

N

+ + + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

Q

+ + + +
+ +

R

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ + + +
+ +
+ + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/img/algorithms.png b/docs/img/algorithms.png deleted file mode 100644 index f83c1e6..0000000 Binary files a/docs/img/algorithms.png and /dev/null differ diff --git a/docs/img/design.png b/docs/img/design.png deleted file mode 100644 index c48f6ff..0000000 Binary files a/docs/img/design.png and /dev/null differ diff --git a/docs/img/favicon.ico b/docs/img/favicon.ico deleted file mode 100644 index e85006a..0000000 Binary files a/docs/img/favicon.ico and /dev/null differ diff --git a/docs/index.html b/docs/index.html index 7856d9a..c455a41 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,249 +1,250 @@ + + - + - - - Home - Reinforcement Learning Coach - + Reinforcement Learning Coach — Reinforcement Learning Coach 0.11.0 documentation + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - + +
-