From a7206ed7029a06954a490de603fd26dffe9f9270 Mon Sep 17 00:00:00 2001 From: Itai Caspi <30383381+itaicaspi-intel@users.noreply.github.com> Date: Mon, 26 Feb 2018 12:29:07 +0200 Subject: [PATCH] Multiple improvements and bug fixes (#66) * Multiple improvements and bug fixes: * Using lazy stacking to save on memory when using a replay buffer * Remove step counting for evaluation episodes * Reset game between heatup and training * Major bug fixes in NEC (is reproducing the paper results for pong now) * Image input rescaling to 0-1 is now optional * Change the terminal title to be the experiment name * Observation cropping for atari is now optional * Added random number of noop actions for gym to match the dqn paper * Fixed a bug where the evaluation episodes won't start with the max possible ale lives * Added a script for plotting the results of an experiment over all the atari games --- .gitignore | 5 + agents/agent.py | 61 ++++---- agents/nec_agent.py | 103 +++++-------- agents/value_optimization_agent.py | 2 +- architectures/network_wrapper.py | 5 +- .../tensorflow_components/general_network.py | 5 +- architectures/tensorflow_components/heads.py | 36 +++-- benchmarks/README.md | 8 + benchmarks/img/Pong_NEC.png | Bin 0 -> 21568 bytes coach.py | 12 +- configurations.py | 14 +- environments/gym_environment_wrapper.py | 20 ++- logger.py | 21 ++- memories/differentiable_neural_dictionary.py | 22 ++- memories/episodic_experience_replay.py | 15 +- memories/memory.py | 21 ++- plot_atari.py | 105 +++++++++++++ presets.py | 139 ++++++++++++++++-- run_test.py | 11 ++ utils.py | 18 +++ 20 files changed, 465 insertions(+), 158 deletions(-) create mode 100644 benchmarks/img/Pong_NEC.png create mode 100644 plot_atari.py diff --git a/.gitignore b/.gitignore index 6fc79b4..bad8e2a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,8 @@ roboschool *.orig docs/site coach_env +build +rl_coach.egg* +contrib +test_log_* +dist diff --git a/agents/agent.py b/agents/agent.py index b88c662..006544e 100644 --- a/agents/agent.py +++ b/agents/agent.py @@ -24,6 +24,8 @@ except: import copy from renderer import Renderer from configurations import Preset +from collections import deque +from utils import LazyStack from collections import OrderedDict from utils import RunPhase, Signal, is_empty, RunningStat from architectures import * @@ -214,6 +216,8 @@ class Agent(object): network.online_network.curr_rnn_c_in = network.online_network.middleware_embedder.c_init network.online_network.curr_rnn_h_in = network.online_network.middleware_embedder.h_init + self.prepare_initial_state() + def preprocess_observation(self, observation): """ Preprocesses the given observation. @@ -291,9 +295,8 @@ class Agent(object): """ current_states = {} next_states = {} - - current_states['observation'] = np.array([transition.state['observation'] for transition in batch]) - next_states['observation'] = np.array([transition.next_state['observation'] for transition in batch]) + current_states['observation'] = np.array([np.array(transition.state['observation']) for transition in batch]) + next_states['observation'] = np.array([np.array(transition.next_state['observation']) for transition in batch]) actions = np.array([transition.action for transition in batch]) rewards = np.array([transition.reward for transition in batch]) game_overs = np.array([transition.game_over for transition in batch]) @@ -348,6 +351,23 @@ class Agent(object): for input_name in self.tp.agent.input_types.keys(): input_state[input_name] = np.expand_dims(np.array(curr_state[input_name]), 0) return input_state + + def prepare_initial_state(self): + """ + Create an initial state when starting a new episode + :return: None + """ + observation = self.preprocess_observation(self.env.state['observation']) + self.curr_stack = deque([observation]*self.tp.env.observation_stack_size, maxlen=self.tp.env.observation_stack_size) + observation = LazyStack(self.curr_stack, -1) + + self.curr_state = { + 'observation': observation + } + if self.tp.agent.use_measurements: + self.curr_state['measurements'] = self.env.measurements + if self.tp.agent.use_accumulated_reward_as_measurement: + self.curr_state['measurements'] = np.append(self.curr_state['measurements'], 0) def act(self, phase=RunPhase.TRAIN): """ @@ -356,34 +376,12 @@ class Agent(object): :return: A boolean value that signals an episode termination """ - self.total_steps_counter += 1 + if phase != RunPhase.TEST: + self.total_steps_counter += 1 self.current_episode_steps_counter += 1 # get new action - action_info = {"action_probability": 1.0 / self.env.action_space_size, "action_value": 0} - is_first_transition_in_episode = (self.curr_state == {}) - if is_first_transition_in_episode: - if not isinstance(self.env.state, dict): - raise ValueError(( - 'expected state to be a dictionary, found {}' - ).format(type(self.env.state))) - - state = self.env.state - # TODO: modify preprocess_observation to modify the entire state - # for now, only preprocess the observation - state['observation'] = self.preprocess_observation(state['observation']) - - # TODO: provide option to stack more than just the observation - # TODO: this should probably be happening in an environment wrapper anyway - state['observation'] = stack_observation([], state['observation'], self.tp.env.observation_stack_size) - - self.curr_state = state - if self.tp.agent.use_measurements: - # TODO: this should be handled in the environment - self.curr_state['measurements'] = self.env.measurements - - if self.tp.agent.use_accumulated_reward_as_measurement: - self.curr_state['measurements'] = np.append(self.curr_state['measurements'], 0) + action_info = {"action_probability": 1.0 / self.env.action_space_size, "action_value": 0, "max_action_value": 0} if phase == RunPhase.HEATUP and not self.tp.heatup_using_network_decisions: action = self.env.get_random_action() @@ -409,8 +407,10 @@ class Agent(object): # initialize the next state # TODO: provide option to stack more than just the observation - next_state['observation'] = stack_observation(self.curr_state['observation'], next_state['observation'], self.tp.env.observation_stack_size) + self.curr_stack.append(next_state['observation']) + observation = LazyStack(self.curr_stack, -1) + next_state['observation'] = observation if self.tp.agent.use_measurements and 'measurements' in result.keys(): next_state['measurements'] = result['state']['measurements'] if self.tp.agent.use_accumulated_reward_as_measurement: @@ -516,6 +516,7 @@ class Agent(object): self.exploration_policy.change_phase(RunPhase.TRAIN) training_start_time = time.time() model_snapshots_periods_passed = -1 + self.reset_game() while self.training_iteration < self.tp.num_training_iterations: # evaluate @@ -526,7 +527,7 @@ class Agent(object): self.training_iteration % self.tp.evaluate_every_x_training_iterations == 0) if evaluate_agent: - self.env.reset() + self.env.reset(force_environment_reset=True) self.last_episode_evaluation_ran = self.current_episode self.evaluate(self.tp.evaluation_episodes) diff --git a/agents/nec_agent.py b/agents/nec_agent.py index c123458..77520a4 100644 --- a/agents/nec_agent.py +++ b/agents/nec_agent.py @@ -27,10 +27,7 @@ class NECAgent(ValueOptimizationAgent): ValueOptimizationAgent.__init__(self, env, tuning_parameters, replicated_device, thread_id, create_target_network=False) self.current_episode_state_embeddings = [] - self.current_episode_actions = [] self.training_started = False - # if self.tp.checkpoint_restore_dir: - # self.load_dnd(self.tp.checkpoint_restore_dir) def learn_from_batch(self, batch): if not self.main_network.online_network.output_heads[0].DND.has_enough_entries(self.tp.agent.number_of_knn): @@ -41,83 +38,57 @@ class NECAgent(ValueOptimizationAgent): screen.log_title("Finished collecting initial entries in DND. Starting to train network...") current_states, next_states, actions, rewards, game_overs, total_return = self.extract_batch(batch) - result = self.main_network.train_and_sync_networks(current_states, total_return) + + TD_targets = self.main_network.online_network.predict(current_states) + + # only update the action that we have actually done in this transition + for i in range(self.tp.batch_size): + TD_targets[i, actions[i]] = total_return[i] + + # train the neural network + result = self.main_network.train_and_sync_networks(current_states, TD_targets) + total_loss = result[0] return total_loss - def choose_action(self, curr_state, phase=RunPhase.TRAIN): - """ - this method modifies the superclass's behavior in only 3 ways: + def act(self, phase=RunPhase.TRAIN): + if self.in_heatup: + # get embedding in heatup (otherwise we get it through choose_action) + embedding = self.main_network.online_network.predict( + self.tf_input_state(self.curr_state), + outputs=self.main_network.online_network.state_embedding) + self.current_episode_state_embeddings.append(embedding) - 1) the embedding is saved and stored in self.current_episode_state_embeddings - 2) the dnd output head is only called if it has a minimum number of entries in it - ideally, the dnd had would do this on its own, but in my attempt in encoding this - behavior in tensorflow, I ran into problems. Would definitely be worth - revisiting in the future - 3) during training, actions are saved and stored in self.current_episode_actions - if behaviors 1 and 2 were handled elsewhere, this could easily be implemented - as a wrapper around super instead of overriding this method entirelysearch - """ + return super().act(phase) - # get embedding - embedding = self.main_network.online_network.predict( + def get_prediction(self, curr_state): + # get the actions q values and the state embedding + embedding, actions_q_values = self.main_network.online_network.predict( self.tf_input_state(curr_state), - outputs=self.main_network.online_network.state_embedding) - self.current_episode_state_embeddings.append(embedding) + outputs=[self.main_network.online_network.state_embedding, + self.main_network.online_network.output_heads[0].output] + ) - # TODO: support additional heads. Right now all other heads are ignored - if self.main_network.online_network.output_heads[0].DND.has_enough_entries(self.tp.agent.number_of_knn): - # if there are enough entries in the DND then we can query it to get the action values - # actions_q_values = [] - feed_dict = { - self.main_network.online_network.state_embedding: [embedding], - } - actions_q_values = self.main_network.sess.run( - self.main_network.online_network.output_heads[0].output, feed_dict=feed_dict) - else: - # get only the embedding so we can insert it to the DND - actions_q_values = [0] * self.action_space_size - - # choose action according to the exploration policy and the current phase (evaluating or training the agent) - if phase == RunPhase.TRAIN: - action = self.exploration_policy.get_action(actions_q_values) - # NOTE: this next line is not in the parent implementation - # NOTE: it could be implemented as a wrapper around the parent since action is returned - self.current_episode_actions.append(action) - else: - action = np.argmax(actions_q_values) - - # store the q values statistics for logging - self.q_values.add_sample(actions_q_values) - - # store information for plotting interactively (actual plotting is done in agent) - if self.tp.visualization.plot_action_values_online: - for idx, action_name in enumerate(self.env.actions_description): - self.episode_running_info[action_name].append(actions_q_values[idx]) - - action_value = {"action_value": actions_q_values[action]} - return action, action_value + # store the state embedding for inserting it to the DND later + self.current_episode_state_embeddings.append(embedding.squeeze()) + actions_q_values = actions_q_values[0][0] + return actions_q_values def reset_game(self, do_not_reset_env=False): - ValueOptimizationAgent.reset_game(self, do_not_reset_env) + super().reset_game(do_not_reset_env) - # make sure we already have at least one episode - if self.memory.num_complete_episodes() >= 1 and not self.in_heatup: - # get the last full episode that we have collected - episode = self.memory.get(-2) - returns = [] - for i in range(episode.length()): - returns.append(episode.get_transition(i).total_return) - # Just to deal with the end of heatup where there might be a case where it ends in a middle - # of an episode, and thus when getting the episode out of the ER, it will be a complete one whereas - # the other statistics collected here, are collected only during training. - returns = returns[-len(self.current_episode_actions):] + # get the last full episode that we have collected + episode = self.memory.get_last_complete_episode() + if episode is not None: + # the indexing is only necessary because the heatup can end in the middle of an episode + # this won't be required after fixing this so that when the heatup is ended, the episode is closed + returns = episode.get_transitions_attribute('total_return')[:len(self.current_episode_state_embeddings)] + actions = episode.get_transitions_attribute('action')[:len(self.current_episode_state_embeddings)] self.main_network.online_network.output_heads[0].DND.add(self.current_episode_state_embeddings, - self.current_episode_actions, returns) + actions, returns) self.current_episode_state_embeddings = [] - self.current_episode_actions = [] def save_model(self, model_id): self.main_network.save_model(model_id) diff --git a/agents/value_optimization_agent.py b/agents/value_optimization_agent.py index 1ad300d..75708d7 100644 --- a/agents/value_optimization_agent.py +++ b/agents/value_optimization_agent.py @@ -73,5 +73,5 @@ class ValueOptimizationAgent(Agent): for idx, action_name in enumerate(self.env.actions_description): self.episode_running_info[action_name].append(actions_q_values[idx]) - action_value = {"action_value": actions_q_values[action]} + action_value = {"action_value": actions_q_values[action], "max_action_value": np.max(actions_q_values)} return action, action_value diff --git a/architectures/network_wrapper.py b/architectures/network_wrapper.py index 0867d8db..ef026e6 100644 --- a/architectures/network_wrapper.py +++ b/architectures/network_wrapper.py @@ -125,14 +125,15 @@ class NetworkWrapper(object): """ self.online_network.apply_gradients(self.online_network.accumulated_gradients) - def train_and_sync_networks(self, inputs, targets): + def train_and_sync_networks(self, inputs, targets, additional_fetches=[]): """ A generic training function that enables multi-threading training using a global network if necessary. :param inputs: The inputs for the network. :param targets: The targets corresponding to the given inputs + :param additional_fetches: Any additional tensor the user wants to fetch :return: The loss of the training iteration """ - result = self.online_network.accumulate_gradients(inputs, targets) + result = self.online_network.accumulate_gradients(inputs, targets, additional_fetches=additional_fetches) self.apply_gradients_and_sync_networks() return result diff --git a/architectures/tensorflow_components/general_network.py b/architectures/tensorflow_components/general_network.py index e8befc2..03bb2a9 100644 --- a/architectures/tensorflow_components/general_network.py +++ b/architectures/tensorflow_components/general_network.py @@ -56,7 +56,8 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): # the observation can be either an image or a vector def get_observation_embedding(with_timestep=False): if self.input_height > 1: - return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation") + return ImageEmbedder((self.input_height, self.input_width, self.input_depth), name="observation", + input_rescaler=self.tp.agent.input_rescaler) else: return VectorEmbedder((self.input_width + int(with_timestep), self.input_depth), name="observation") @@ -191,7 +192,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): if tuning_parameters.agent.optimizer_type == 'Adam': self.optimizer = tf.train.AdamOptimizer(learning_rate=tuning_parameters.learning_rate) elif tuning_parameters.agent.optimizer_type == 'RMSProp': - self.optimizer = tf.train.RMSPropOptimizer(self.tp.learning_rate, decay=0.9, epsilon=0.01) + self.optimizer = tf.train.RMSPropOptimizer(tuning_parameters.learning_rate, decay=0.9, epsilon=0.01) elif tuning_parameters.agent.optimizer_type == 'LBFGS': self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.total_loss, method='L-BFGS-B', options={'maxiter': 25}) diff --git a/architectures/tensorflow_components/heads.py b/architectures/tensorflow_components/heads.py index 2653d59..616ab13 100644 --- a/architectures/tensorflow_components/heads.py +++ b/architectures/tensorflow_components/heads.py @@ -57,7 +57,8 @@ class Head(object): self.loss = force_list(self.loss) self.regularizations = force_list(self.regularizations) if self.is_local: - self.set_loss() + self.set_loss() + self._post_build() if self.is_local: return self.output, self.target, self.input @@ -76,6 +77,14 @@ class Head(object): """ pass + def _post_build(self): + """ + Optional function that allows adding any extra definitions after the head has been fully defined + For example, this allows doing additional calculations that are based on the loss + :return: None + """ + pass + def get_name(self): """ Get a formatted name for the module @@ -271,6 +280,9 @@ class DNDQHead(Head): else: self.loss_type = tf.losses.mean_squared_error self.tp = tuning_parameters + self.dnd_embeddings = [None]*self.num_actions + self.dnd_values = [None]*self.num_actions + self.dnd_indices = [None]*self.num_actions def _build_module(self, input_layer): # DND based Q head @@ -281,29 +293,29 @@ class DNDQHead(Head): else: self.DND = differentiable_neural_dictionary.QDND( self.DND_size, input_layer.get_shape()[-1], self.num_actions, self.new_value_shift_coefficient, - key_error_threshold=self.DND_key_error_threshold) + key_error_threshold=self.DND_key_error_threshold, learning_rate=self.tp.learning_rate) # Retrieve info from DND dictionary - # self.action = tf.placeholder(tf.int8, [None], name="action") - # self.input = self.action - self.output = [ + # We assume that all actions have enough entries in the DND + self.output = tf.transpose([ self._q_value(input_layer, action) for action in range(self.num_actions) - ] + ]) def _q_value(self, input_layer, action): result = tf.py_func(self.DND.query, - [input_layer, [action], self.number_of_nn], - [tf.float64, tf.float64]) - dnd_embeddings = tf.to_float(result[0]) - dnd_values = tf.to_float(result[1]) + [input_layer, action, self.number_of_nn], + [tf.float64, tf.float64, tf.int64]) + self.dnd_embeddings[action] = tf.to_float(result[0]) + self.dnd_values[action] = tf.to_float(result[1]) + self.dnd_indices[action] = result[2] # DND calculation - square_diff = tf.square(dnd_embeddings - tf.expand_dims(input_layer, 1)) + square_diff = tf.square(self.dnd_embeddings[action] - tf.expand_dims(input_layer, 1)) distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta] weights = 1.0 / distances normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True) - return tf.reduce_sum(dnd_values * normalised_weights, axis=1) + return tf.reduce_sum(self.dnd_values[action] * normalised_weights, axis=1) class NAFHead(Head): diff --git a/benchmarks/README.md b/benchmarks/README.md index d979cff..ba237e7 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -116,6 +116,14 @@ python3 coach.py -p Doom_Health_MMC -r ## NEC +## Pong_NEC + +```bash +python3 coach.py -p Pong_NEC -r +``` + +Pong_NEC + ## Doom_Basic_NEC ```bash diff --git a/benchmarks/img/Pong_NEC.png b/benchmarks/img/Pong_NEC.png new file mode 100644 index 0000000000000000000000000000000000000000..4148669fb80804a678fb285e9aa7a2fe323c6964 GIT binary patch literal 21568 zcmeHvcT`l_)@K<&1(jA&K@n_4L86i~q9PVq$r&VPBxeu>S`-ivkPIRsITuMnV~gY< zp(N*=l$@$+_APwf?R?*xd2h|kAG6k@YC+Y#=bn4c&il9b<(a&!#4)mSWGEEs82a9w z2Po75_G#jyeO}w^@cOlBhnCYFf#al?jYtid43Bxa58yA1F7zE@ zC769bAi=_N$60X_k3x}=n!>77Wuyi4tu5K~46Gj-vN>DYz;aP2AyH==J$-XS`-=|^ zjZLkDFD;cUomJe~)RE(o2-!s; zTLUA32Y2rNm;$~DUox?`w-I1xcXD!KbK+#Pwl!wI!Ozdnew~AzgM$@Du-ds;+3Pv8 zTG?Gjrr4X~j-j2tt*MQ@skPNbWKO+@)(-Z{j^FjZ5qZCE0(F5;(*DCu)L2RvV+#l332jvpTp88a|t9B^1j0%5oED>-f$P0dE+r67Yuam4VGQpIb%h zGWRJnCNI4R4o=O7P7#axLJJ`2DxqIT<1-)M3b0yc1#L|NRG)&wk`8IFEd=d2XeE zd`G8una$}l!=*PvVV+dR07~&+kwKBC{?AzO3+0)nwl;a71iaI`(q|9A;$9v-e*7JB zixAg}Y~m(Z$yd_d`&2;Vei|5dj4 zc3Was-o9a6t|4b14_DEyCv4IYE(Z%O-@{J_;~CM*U!>S4zKwj`iW}OB+x<>Qc;vV= zn3aw~<(~7Y@xtX0xJ7q*T9=pb165KL+N}z5!r@-4!T9^GZf+}K3A^;7yCW2wf`Tzm z4pA6QwZy@KnaEM7#;*>R!rR|IQmoWn4j!l5ed6`9Ifi#?&~2%CeB8hjH>Wcd@3x3G zHa3QNI=Z^TLqbj{k)XW0examw9)9dTr!ge?MOACFE5|c@XR(|;RW3o+%gbxdrLWk! zb#=8ceLt$2!>Mmg5R%Y@Ygd`GuFVK;%WGCxqxJA_{^UKtOQd-ou1JS{cecJu@al^K!v4GvA# zGbDT%C19ME24SSim3=5(k{eOfe={v5M~biQLsd7}zVl>DRyc(Y9jQN+_1T|6wtsaa z(_zDCtk)r{)oEkCWc2gG>>GimE$7KFJ8IMJStHB7ws3q)HhmZ%q0e zqT^?oaJgBSTH)xUpDQJ6pC&+2xLTq_xd!&#F#E z6%*Hz)-&@W)~a*FReR&%S49+@0CJ&F{Q~ohmw$YLpsN zMAce1^Syg2!8kF4_dM27Q#mZK%OTCOv`CFY-#FFI%+XBNImV=G_Um|kt({RSCy$^< z4^{*>v6T~?h{}|NIXHzLUD{b1Orxq!U*4@d@3LN;qqDx*y%wX;_a#!@$=Y#aFbYrN z>hId>V8JdR;PR$4+*s&i7=x|FvOzp0z@&^FvdDmENXPyc@R9~qT{T514`Fi)}FL?mhI%HI!Tb0xml<2`0US3 zk4;NxG^cwp$$odRZo2c1<61vhu8f**H(xE6wKRYFa2~6vj>Xf2m3mEMi@-v-9B2LSc`d z_8;=dC_2X}(6!kbvN2fpi=V6G+wOrF%eaBSUrG*Q)#XvW-Rz2A`Ep`$t2?K&F!`p+aYir!ahh+)}(Fx^RGe+Bo1+eyS zF)78C#q*lh9u2$|L9+36bu+b0O~AI@0WSg{P?9Y}s)wqg+(}~2X+&AYm`y25nS>;< zf-oZV?fK=i6)S}qQ$awa<>YpqN#G-m>q@H2_FxWcUl{$F#uP$bpu|=QEnk`ZewJVnxb=sn7m4%&#M#D&#cRDJFh* ztBSX_%9)rUxYL6LaM{cuSRU`+DWch2#ws^A4}m1b3J7{MpOTwok`s;Ep^yxHUWu3F zeK`@jjya~4WmG@mesxpQZYJ+FZ@zl218(wpi-$Esg8MSMJ6o&#Qucjn6x+6YILW?A zxh+Avq8LJH1}CUn!NM6r5INb+zW#$=8*6mN_GXt3XjSm%Z#>Hwe9>LDV{5Q<%4KpP zVM-#os)^H@AoTGR!*Y|#M%H_$l)LY0?ZyX+&o#CttgwB%sbWovS>+iDpR>Y!v=P>z zY;a0#P>^h>mS3ccFL!db3e%##xIR#%pucsAbVfgn0kn8Gy6TkSVaDQLn}qR?S~*m& zoui|Z(@BUiDY<_(t7Wx+d*c+PH>Tv*!Hs3RsI-JCqkD{&>Di5K2ZF~(7K(&$2^7Z8 z%Xet;b9Ce1d!IDAI*fGA+XsEvP(EI@(`~qPG!j-(QMvMHeyHDxtE#lZ@e@|zIGdS; zA$4nBeOMOi(K}CBdDW-7HulSZoK^`wGp8!8>u{3N+wknk$=;5F%Q+r%a%}2a&J(y0 z!@lxwtL(>%{Z?KHpQEtOVO2Yz?=t3MdeH?=3gLOZqT17*S?q`s4E4>w2*roUF3;{H z^M9)ic~090k_>i6wOb|&f+8CkL%AAhoI7pBT;=YT%EnOVMyb7n-j!9hql1J)mL9u$ zV3v@RFPdeiKzB0YL$pc#K%Bd~BbQgYXs%9Loo5oQ?9F?vfwgWasH*Z(v!LF8ki^^N zJJ>>?Qfqd+pp^i9lS4|E@5r-LF8Cx-vmGaGIhd}RJ*ZImR71?HV^>wu+#7S{_z)2e z?m;1P{4%XS^D*_Z2k%Q*UfB~JPNdMrC}}(W41TA<2lqimK+G(&l-1mMv^VweKok15 zVZ!epi?L4+LUgDcd3gzU6>Y0aduXyrOKWqn)hiYpSdYi}(0J6L4XsBMo`MIT8V}-K zGVYgv=F#SOOytK5J35WLSbe^8x}l(&-7-R!P4w}M+3iTH3BFe53*Xgg4`@{SNx6xg z(QmowLZ+lvUP~sGJffPg55lqx^`6XfurMSp#rAToK{R~?_T>SrS1btC%I46nAuZje zkFoJwWuG=t9WqJc{O{-_S;}i&bCv?lwUwhX?^9FkZ6gi_5IQ`RrBVPvzT=vlSc@*dkiDLLrQKzqCwtSW179dp%nO zB&V>{mV0DaX#J|DR8~1VBAd&r>_Ov26n6^<6H~UsgEWY=ZZ-V zLh|9wtUBNIw3J5)J0U8xs#-}{Z(W|Z)@eobnP5Gn8b7zw;Z}Jo|P_=$+3_dohuJ!q+n@c zYI*ih@{DFjNNUxt{9r$U-4|BwJ9mSR?==!ti_fl=&3 z+WeR|$l#r)NbsP8PSCu^tpX0nN2=HOzzcwj`%?PuGKDprNep(aqW8&`=ptbVIh5mh zJsP>ALO}->wcsURPqSKE8uW}O`x}JVE*D)4fBs;_09c9o?S?_8)M z;T^uq64~`sstcTVZ!w1AT96@qGmX{Ni(J0hc*Mlr5IFc)*l+o4wS((z?g}0wZ)rfl^ zDkAc)%!!2t8RLEgl0dljpXHDL%Tw2N{QUeNxj^1o%1U&Qeft_fP65d&V?gJeL)W37 z^7@NtkiRf`u%*gJkzD^=kZkO|(D}uIBq~I*xsJ;5_|(M53!XlJD3YIk*d8%o>l>H} zfp_l#+Cijv?J&Z`?NdZL`>2Rgp+ntQ)}tRHyJ)k4u=6D3wa9cAEO~BFfQ3NBli%YLQGOpk|I+$JsPt4ln3aP2Pc3=WMmV0?}Z^tW?|S%vC$85unLso0dIbx z^ww|!_vN)8m3iG~zfGfG%;z_Yv!%D;04$|BQMc~|@2WhnZ#9&0|?owGh#*w`(58skE;o-oR#W^2DY{C^WJ*TC(fPKh>A0cu;XELIy zC7ohUqOcj7GE~#EU3EuM%<@I*mL3-3`%0`N7|yMnB9%5E^*){U7m|Tpx^=nWR4__6 z1)`cc<>TshTucaGB1wFOg@3FIxR5NWr_y@i$Ia?Y_h_j3u%z@2EE16?+oX`5_V>gk z^DBr(=Jm|nmCxOyP3Q|Ql~Y{AE!zCb$aOxd*mC;8o?JIrO5i|{;C?9~X|(}0;M#u3 zbY4$znCg8*D~8`B>dqlzLv8~3b-3gecz2m8t?5zo_0kmbv>AFL6OjL;PX30v^A`Ht zdK#%+L?MUD(cu8heaoloR5x*b_SN^V8R&WCgljLa_p}22r8l(=P&_o~I{%SEv`Nl$Ic)cvzi89^yv1=^q1SPOPVc~-val$C z+o#-zD#4%jbz3ZDaNAg{n9kE7Y-39S%=yyO6E!$EXz$=)Xkc(hNsk&4KAYQOdH@P- zeTl5JZ`c0^h_kZLS8VL+s;&e;MbSZDWGr#|AEnX;M*nWCdhR*YYbaK*Bg><`)6+o1MY^b6d;()0H5IC;GHZx*1q1J z0%HwAg}SE>uEL)65xkZ*M+};8r^nGrB0n^V0EeR@eB&Yl|$b z2^)e0R&>7kj;QA8svNv_SwsoESZ>O1j-IW0ZSA$w=dxVSppd4x+^HbP|NXG?Z-M66 zYltvor=)bYAK>R(@$+v!uP+X`^_bqPURG+6pB`#Up+e;bL^U1Nd>h-qLe#yN^!(g? ze1pzlH;4*a{YfmTssjWK)JjI*_`H~&zS3n*;y1u4Lg|YZwfBTO$-)2wDteggW@8<6 zHyBwyAK^nZ7&%nkMH3?wMc>m=0P>+5Uj8$*hz?B-V+f@`pd~A`-@7aQFLQFXuLN@& zP$sc`CRF?bLf_x_J(ED}0~Z*!D`*~N1eSyxlZ%5pgnnkIEsF{{xuW;2Ibl+I<-*oP zy5@a=Iwrs-`0Q`;4@vtzFS2Z{9u^pxj2Hy^7z^S3VBTMgf|nzvqY&pza2zA+Kyz^( zfJ<0@yKRlg)cSy@zIR&^w4pi~wvuJNLF-&O+fr5DZD((eANSf})AU7VqA<>KH zj+FUY?&etHjckM5P{BdVkJgzUZ42QKs6CMK7(hZ%kFy7nylKapkeQ`3_YWP^;zQ|v ze^T=k2wX6Fp|?v_X|sKrJrZ}52kxtS{U;B^JP0ApR_($8B&QmV5movHF;3?iw0+MG z?Z^c26mKHT%O!i!>={enqgdgts|GQb5TG`p^!`IQ_5c#(^tl((fr#&f07Evbd?}!{ z$7^HhifJYjOXRN>$X5D&l21)M6H`eLYlG+wiA7IlkRc~BYBK1!NYfW)wsw=n8G^p2A^^^z8D<6}lYZ z*Yp5z9Q%a0cGa|oXSG;$8O8+&EF*W0JhA%SKQkGa}#s3 z{{1Q!w*|i{j1ZH9YQlbGbqxT+O)67{7GjtJWo5>taj_5gqb!ruvj94U)Zj3g4d-Aq zz8rRhxMz@D2ZbSh@Q7$O3%&g)RkJl$VI-C#6WeFS#270QPQNv|-Yer3JkYE}(ie>K zcBE&lmf7s1kkhm;@KO*asEch=^tNz z12v#8#w@&lE4L5XmM^3AtoeXPF)iilHAI^-kAA)?$78*0?IA985wH zWiDIsd(%XeCO5}y?UWU_^lfye67Ss<&P+zI89O`0&d9Yx*|2mBp#%pa{PaEi!4Qxk z8l?{gTwQF$Yi3uAWFK(qBDJaC(=DpPZmU`&QQ6Q{>uKAhecqoa$Zp@hous;4sC6!vnBLz5Ut<>%-^sK)D>p();{fI5x{t-ImerxE zzkp1%G~l21MuZo=k&nj|e%b#Evb;C_(ye<;V779TEq##x1*3@QFN?c8Uj>KCsnBtN zC&0&P!}391d6C#9#O2c(16T0{ORXw?0HQTb5Pd$M_i*Qw@;6B&8mxW;(GIh{xEPp8 z_;yU7*JAy`w7Sb@f6Te4J(JZx+-z^x)+=eTu9e1og{baUZ|CJy^*;M&kB6T`W+ZXS zjIHZ0=V0IvJcG#JFe$*ZEr61aNd{HV?NOZZjhM+kSl7qya*>HX?_t8Qb34yZ`+wW;_Z~{YT zYCsC&P(lL2cU3L+u>q2W&6ZvU%gn#KSmWnQ9@qr@M##zE0fds?S=z3kuU|y$a<8yseUmL*mHP01I??6U9#V%Cp+hctB&{>PA$bx56|46RMSH5DE7VRO)0 z9e1+$q4S$Z0bg#aCYDsJ1bdw}WY1C|vS9=^ zK`zunvoG&$&KO{b2m%4vRk4stQorMB^8%!F-aO<#E1X>4xp=;K`ZR|P)|t7lidtB8 zCFk{pgrU_90FZ0Aj8KUlPilsTbpzhqyHA;holBSN>r-7&UB+H_Qc8+VcTd)~^o)aG zIg$0rewMQzn=M7k-Ig?$vH59bY=;(c>Y?M+=Ov&Nv{?J(mR(=FCu9JKdRf2gNp54S zRB^jr?z&h9vL0XhJL|p63S&sB1yr~)BuVr*nP64v(EyJFbg??GvcZ7%N>?Iu5Z%Hc|~)!jR7O&N{4FPd9Yvcrv1GhtEo6&zVE`KJ{tCGibq;gB0F>p`XlxNuDa%?sSiaEfL!ouccP$;Vg zOG(t(7*ifj>p6=hVH_gt$zh*YKv3()yjCL7p##kd(ZrS?Wp(4F*HAzDD<9V=gO89I?=S@3JW`Aw~TX^u)yi@Un&*y`arS zUEX}i8c!RK^&S^Tp_Jpyih8%drb%4?TsL#~)Hj|!N3bLB{|w-&H6a~c-#CEEh)YD- z#YS5$r8N=Vz~ek4`S+GH3P)i!2oOO^nSx~ErB8l-0Z!WIBpmYDulcD?am=K1f)COm z#2$jUh&Q;n#D>R_vNyX#^kDH~=#yD)MlihwOa7 z-XN|EmI_qD*nU;d>ihnlxbBBxv?R#LP;%UnJTk6;Rfdg>=$$4(d7mCe5*Z57-Rq)T^>WJ}>G0PJ3JQ?gvg2aEOZzlv zgYH&o%4y$!l2Spo#C^!OBULW7s;X*jYki@QQ&>0)qNm*GC%3>5C^BWW3JM4;_ZTOn zF`-Z_cEz2^xrU7qA68lfvsy*Br<2`B$aPBQUBJ8Z+fTQhu98T3Q2+arLw3W}kCpC! z`~Lk=l>_s5Z%G7Rd#@l(MyB%UpVYJeX}+|!m1VBLYip?~<;9NVT#r~ud|SVNV)wl9 z-&U~K6JOFpBH27x;id$Vr~FG;6!gUWt9>!5BL8<2-F1trsmqw z_RMy8((pyV!BGI6|Hu;X0#wtZn3F8o5X2!F6}ZQM_N&;-XehqOA4PfdpK%b^`x~y| zpmgTLhhHzm^DP$Vu=>>834QBzyBH>6QIe;kp^-+M^`$jetu-}U{er+Oi9UgHNceTr zeD)W{v`+@q&}sxD*kX#J)gwr87-CWs))_3cs?=@}k1AO`qCQ0#q1hupXbmn6)GsAJ zsUI>l(9ptHeN{Zl*lVP4IJdCjT@S?br}dwO_f?r`Y{NZ3p*1PH%?2tU#{^&KYbgPy zI=>$2-w{i?@app;flCfEPi|R02PXnYWZ(!HnUwLky}sYAEDZnv;chbeS*L;p_9pYQ zW%{8aH|0$zCcJaN)IYa|QXl$D)$fW%C~xLcnIo!}e_T?-X!TX<$Gn;`+NRysl^rK_>x+<8Mx4-HnwjFbn&1h(=H{3$vX zfHc?&jeY?g1DYo)3DL>IFyZF0`@u5tiZ8AlAq!;HdB<@8YzySrz5R|{ZCx2u2(h>d zWrb!t0YZWcl9n$lEMKrXBjGZ7KdUbQ*=U;|Pj=V6a!Rlu;H!Zj7{%i|OvN?1d<}Oe zkt81>a+_y^p3a0a)tS#GcH&7wE)Z^uC1qJ;@(i}erzNPo5a)neEj?hl1;GA71$nB! zp_E!(zB!gN<~lPbYSR{bOfGG# z?VcJP*aY|_QC_|MmjAd+G?=~^<>Oi%sky3RBV+VuWn=dPflQ<(+KM=3MbzIWUT1{> z>M+?tYa3Y8gCuPF)4ikDik-syDiAl&8>DmYDlT!=!q1 z6Zw?%ib-b+_Yae9Y5=nKpzjNjqczJ&qKoF}{SZY0e0Z!Z-1( zgO{H|!gbKs@OOK7o&_O=z)Fn(cOR7zwkiqGKsaPVWlrjA8K*^55*9j$iRtU>OcAC0 zh0>%~)zU&Z+VnW7zGooh=i@a;6c8%3^}5mni3?4CVzCnLo$XxvxA7xDK;5;U-CmXH znEI3IGm_gM^!&5qpu#Xn6$U}fhg`B}N|n6_n!|$q7=oBd>R7&PgOA=#vc^2^o}-3H4kG)(%4&Y!h>` z&_BNiV^KNL9nNrXp&N9_M(e+Avdgs%7be@2)?v~kWR?br8SVCN_RCKnHa&r01Yjb7 z!T`QsE_#%uFGS3Y#V&w_{R4~#+%T#SlawcJG(TSm>T{8?t!?Ni8DyfAV`l*M`P0d> z9Vf#9Pzp?)u+n$XI@tlAf1!Mxj7ecfgZ^bx71t4`1OEacPp`BgDg0G_vHXFsyz6BZ zj|4}JjEqNrDj|~Jz0C)mW@KbPAvfekVB4M%FSx{LV+jon=veoIK*eerXgH!rw|o__ z>u7js$i#_g2diNnM>sXYr5hMalm7moxH#n(FOs3gFF;(iQE=khlqe5J41klq^som`4f0h`|Q>-6uK=5~K~(yH$xc zP~cDV_DUg&H@gdp6*kOBz!M6#x;7oE1Sl)TA4d3eO24b(90r&=d>5Koh!NRAm}=P8 zXFqemUeKsnyGOh1UnutoH+*u>n!+6g5BmPE5a);{M3U;EeF}`#@ENU*>Y}RcjH(9n zoQjA(>#B;iul{tNuM>918Ma0!yk7V8^w0=7Ul(**{9qz~fW=XMdTAa?4Bi^{1I%O_ z{dRH)h}y1Sgi{d{MUaM%kTtP2Cq1(M^-kn)GdiJb%h-Au_ z?GMSxmlHhJqtf%~)j26?0?AOh*TqIC2;V4H{Jl2)vAh2!F735Q@PwwoZZDm=QWnbv z&4jL%jgwg9^+3KvY-7mFOWS$GH&)b3#JDxS`TN*sg^VsXw?WADz#9OX-ytU|v)7C} z%Py%iG-dsKW|tsvo-okk(mZ9GM}bf1p|8(gAlUqF?j(#24gdS>I=iE1R~D?j1m#4w z3;-~Xcb&b)K-(NEh#3@0Rp2UT!Nb3y%Aj=NDPFhrRliPmJ|!xt%_A_oHu^1GJK!}z$}<5Fu=2ES zyJtsYv{!z*$e1Zz#9DE7V3!zB@aj#5*)kO*-$yj~bb9vyl{ANK?};>QhL9|#dtlGB zHx%aePTzmso{ZFQt9=u5v#BX#-R&zNsnk&v1l#9(Iu(;rx>jdDImgp|l+17exj8r} zF}(@q^R_a^m7Yk1Pu|cR- z_-7u_mJJ7eYKp>ThW1R3x638Q-1#_DtM=KY!iB?#f{(zEkEcF^oP#CxvlZaP^4PlW zzOjMmKQZ?qWWuss-*Nw<`g#7f$WlM#X|P&n`-CEA<`U_LH@+yk>t zPM~fyH>vPKLrL?|(0?s{nX#}`2EsoiM1)}nNkoQjs_5^u zL-*p|{XW(fT?qKI4M6iYw7j69msJy}2>$&m;@>$SWcfk&lCtjZ+@$I zX|l;f7^D1TsR~yBm?_e*3*95{FCyGuc&Jcxxqj4ISOD3z*N$uGcw4rFCImiWj~yTI zkOs@ju*@cCSn<0;KUDr_%q4_Va98?|d#yFMDO3Th0ZvLGm60!Bj~BK<{xQ*(qb(T< zjYZ~I;X7GEaWjB0i>KA!v-ACQKNE%GHU2Lstnb9zssi(NpBiYSElVSv#5pZV1JDF` zQym;p|AvJUIrcE4xy%z%R0;KeX`BSdCWRM}) z7fBzJlIKvq6VcZ2s6a&vsf&%*oX8+QP1!IAl6%ls$!#T0lW!(<;23nPW#nxEQ079w zPn^jNR0@P}2MmpJ{5)qt^|6{2q~HaOb63!3-U2k~qcpd;)`&#^xv@fnLh5VK1U6Nn z0+L&KA`wAhLHE_RTDGP+dR$*|DW8U-)t0srRn$4_PQ&L!R0O7$m0%k}>)cz}TJ+-2 zU8r+g5%kW={P@nyU*|Z&#ZQNiuvc!x(AVS3y_O=XbU*HRw|=k(j&d@GR--aqUpnkfUg*i?Y|JRjVtzwfDQ{#MGWRJ>(kFuGw(Y`j5v#t z2<;$=tzOAjcdmo24sTs(W|OpnS*GW_oOf;_MKb5*KRz^>eJ$xG24;^*9sx|Lo^izO zjB_giibX;nJQBC2@R2cik+VjOt7YEEJ;{^iWmJD7on*e|UlN`{#XDA`XtY-wTFKUw zfj+GjpY^LvMc{mpx*aSl<{XWRRr$H;d}y%(S-3bEZ(sPjLEe_LFSrFdL3pwkZ1y3Q zq{%*=7a#JbGmf0~xdleHxk(X9#;W(B#lKM;>DqWLC&3_xbdp^`LsiwFov>TVdD@S` z%3`Nonp1TZalSy2DQTQ0trzYyIuvQrgO)K2eU844-RUuCJ+qpgYtUEP;k7S+O&mM% z9ZTRq8l<}HKEK(x+N4DK3M*2Qcdc2!IWae@gP6NlvkHRoK)UGwB0!LLI^or?p_TEa zeZkr;gl(16(2s$kMexS{H!6d4^&h=(Pn7RDKUz9vdE-eUO{HT+Uob=#knD;8Kyna5 zVzI{>-;tik?^UK2yxUN_<;M_o09(eM$;pCRipBD{gK!m;$}yjgg3sSl{+er%o?%)G8D;mkBHNd}Q>4C=c8ERH+lQJ|`?HRtDg$7AE~}18X*z;I zXK(e7Owvy|s-4(}QcZ(MXC4IHeJsbTZ9S)eW_^9uA=Xn7nu;)p?^s^~ydn-dEHFnqnn#pd;50e67HdUTOFsy}=U|Uy=v6t6SKyRZCEP&Y61i`=n zZ#&{zfJxNTv&|tqQDM2i5F4G?_&~9%1D`&XSSn)&kUW3%)-7ATZXZBNfx_pH9`V9= z;h5K_q|XKRFyA=14^?>nb28bH#xY~z1yx04jCpigf)4kuNk_$>K`jRL{8M+bVY(3B zJ~6}TlmF9FF{JueJ*wJy+IO$(4I1ARGnBfj#3yF%W{CCFeMmL-C#OK=e)0LKkm5xS znl&z=<7T16wN4u+)J}F6p0cS^X27;d994rHb$$8t{4TDq_4dtw8vMcDg*W zTIq5;43%j1k%N9$bGzHQu_ByDa7Js$rB1dd-_#I*Cfk+f1#VxkmY0+%O*DdTBS1P5 zy%1y4A0Jc)V_hLmrT-V7v-2iYRqO<(rF-u3;ROz>+-l$5XWG#Jl=2(4jnz<=QzdJ} zCOZ>Vv>TF~LXVD`>Gk!rGJ1#8Ymm0YoKWm|S(BpXSGs${@5n(DpCi9LeY1S|L$!w& zetM2u#QT=c=5OEJcRvZo?udNenEMc@GNt(zoE8c-eb{n)?R_oP6$S?Lp-PYRtSlvG z=aMrlsu$6cQc?<%k^%kw{gf9jWI-#3(P`v@BjQQ#-{0HWSi(*=$4uDpQJz1a0g0LU zbX$_7q~w%X!|~W`aUyL z`g1UyioE>G4;2;M_S5pmsIK3He+tsX1Q)fP3aA;c592m&p+S2MY4JJEzrHFP`(7%N zt-iIjwP@(K6PFd$)ao-K&_+3xo}m+#l#%JG^w^D z?z+#PC2FbEA|_lDQd3jUoj)I?1lJ`Bp7DjQD09d`v$4=Do!_@`j?X3$V>OU2;<0@% zl=RrSw~)%0t!KP{-!}aFlbFEBmoF1HH#g77Rp+yO`t-?kq9I)PVU3iOROxKpNrNN# z2h~EaEVjJ)Hgx=g0KJ@?T$W~Oe*J}~hv?kp>+9>&GcpvEl)^m;IHy2r&UZaK3y$UU zj&5O=Yin!yJr*3btM7enp~7I-6>^0*9lex&@SB%0F&XegA7tWc*_Mrw>?*3NuP%Z>R!|3+&egb1#Z_kVl#?7pU%a>hD#qx2B)$5WMJ(c?)XOVsquILD`lDBR2-ri}IXRKdMqxMJO-W-6*WQb= ztN9Q|<4x;h;M_QqShE1>oR6K;WCZy^I?#8b(fj-F>BEcCVvS$Ee0dcb3jf`QS6o7Z znTxBid8g3H%{cbFl`A}*s-&g0;2QVp)muDn$LSf1wQ7}T#s2;Kxxg~*FAuD#x#a=9 zj;=ck#eGF00)mYgO=A+HE0*$z0^`L+mUjR4SMaIJ6&F^DmWMrYm#l?sn)~!ZOjE^(Y(~o0zz}AOS=1thDa4aVBH6S;6A}{U*7h7g&pVIaa$IMcLPJxN;=_k; z;NHqP+F8}rHv`Y}%7!XxYGy*;F7}~mW9i5{pa*gkv7v^BDWjvKlvGsjM?U%weP;>0 zfjpkQ22X`gPq!yO?II1kD)V4zutEbjvMg)Z6ESymp)tRR|4L)F;qrKWcFyyF0Q>o` zH^jxo^VrHwBQm7|_T!fh@ipda<_HT5cYXc(6=QXmITqbQ71UQF8fOQ}i`SQiaymN|ySls6-@UtAwY&Vh_62n^LWFgYH!-ot+p9CldLR989>15D zH-7w>EM=f}|9(cI0#oQKDXzTXQ*8(!lL!0RbGaR$jPjvcJ^sU0z;Z7sAj# zo}HT9no8h>`%U&1nO|jMY5*8`(1A@&O>MHERiu2OG|5mDznT2(`E$;qqM~>C`CnMn z3+rBmhdVrDA(-y0FSx%h4U~-&=o&wetMzMRcQ)tl4vR*NLjJruJiD$t=)NK|W-kz9 zVXWoY3bN(?nmW|o!{anHb=vFK5GbU4{i+J?O{>CHNh*S+op;E+V^(BgK;+Tr9hn3X zt@ieIj#|b`mqsIszo?%-#0oJ-^zzC|k^RhrwZ#Dq^0RE69vcJLEBSXmzt7ID98<$M z#*GXayJR2|beWe|#h-%CYMGomf?X#&O(8X(ty~YG(Pxg@Kcwt(8?-{NaOUtH@5~0|f=cf*vcESdbGi zd~?8)h;p|+g2I=0eWH`~KEB){xKuY|FBz(wrJiz4;obBzhWMBCYqMQ*5lgI$j4$DF z$QV!W*82MUy1KgMlDFJ~_YtkEz;9qp=+m;ZWkEyKF&0tV8%s;uo|cwbCr_OUkBUmm zP{}cCcy&#^)HbDZXHoFV)vJhYIdbF(#9(E%69GKdgPBcD(%f8Jj9@a@D>vWl?(V({ z3roJCUt8#W9K@iv(xaS=oP1(xDz(&Z%51VJs<+(9xj?R!Wta_Ty{q8#VJ8=Jg^4(HQ z7Z%IR!=qeFm8DTEJJ+4nkyC|FA1HTX7826jw{IV^t$D|C`k4Wzp>PN|d93>1Ltp-M zt0@$9(Vi?(ik4a?Cge;0CDVs^f#0AWg7((Nng@bK{Mm2}>0X`bC^Bikgf?@0aakg0y2h8>72L4alC_0}A_T~w zjB9PTDM}@MRyW2JKl&XjpSXcSUGSF(MkU5P5{PSPZnj)EX@h@D(Uqek3QdFQOcw+l zr6E0T0AaSV$%O#)M(vuKH|**jo%^@Pv|hW9wl+8GfB-;!RuRW>8$SEl&TGwaf+~WJ z^YY+(WD`WPtgCR{O4?R5W3^i!c9$D_3ygyyyGYKd+?G7gYt{KBMGEMKeft|17uPh@ z`Q-22t7HA4GwP2P`}w;%JGsh}uUmY*4z(tgv+UZPGaadG+jBYQAmHb2Jbb#myga_N z#GcxLs8D)tZkrORY%E`rObk!`(9qD@0WT@>Bya;cAC?2)8Ibp|uhXYbr-8-;m0rEQ z`uXKsknL;2!dl?3nXX-HcU!7R4Hm_#Mi|aCMR9_-CD>0kQ3FF}a2rD^Rh~FXN=kc# zr2?{lMp~#g;oaSpRwgz!) zRN=Yn3@*nEo_>*(lT*>xPrh~gb~s3GV&cV=w{K1AL$Amz%HDebNoZGFTOz2CCK`=q zVq|RF-CEJA3Y<>(OeO4ESQ~U9W9)OVc^}47!1-I^-C)|RaAuYC!a_xAYHHWXDC0ti zbrC;>s3$mG(xX&1#I*+*zIYaCU<`?+mYAz+Sr85Xo8m#&=k+LW!QVh6P+F3no|O>P zy*^AdUp5Nb+O&|nl%Z(hsSHR)k2)dwJ&J3EE@1Ha>n$lyH1ZWKE_)~YmafnL09f4c A=>Px# literal 0 HcmV?d00001 diff --git a/coach.py b/coach.py index 74882bb..8ba8cf3 100644 --- a/coach.py +++ b/coach.py @@ -34,11 +34,6 @@ import sys import subprocess from threading import Thread -try: - from Queue import Queue, Empty -except ImportError: - from queue import Queue, Empty # for Python 3.x - if len(set(failed_imports)) > 0: screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(failed_imports)))) @@ -258,7 +253,8 @@ if __name__ == "__main__": # dump documentation logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True) if not args.no_summary: - atexit.register(logger.print_summary) + atexit.register(logger.summarize_experiment) + screen.change_terminal_title(logger.experiment_name) # Single-threaded runs if run_dict['num_threads'] == 1: @@ -300,7 +296,7 @@ if __name__ == "__main__": "--worker_hosts={}".format(worker_hosts), "--job_name=ps", ] - parameter_server = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) + parameter_server = Popen(cmd) screen.log_title("*** Distributed Training ***") time.sleep(1) @@ -325,7 +321,7 @@ if __name__ == "__main__": "--job_name=worker", "--load_json={}".format(json_run_dict_path)] - p = Popen(workers_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) + p = Popen(workers_args) if i != run_dict['num_threads']: workers.append(p) diff --git a/configurations.py b/configurations.py index ddc5ce6..5e553d8 100644 --- a/configurations.py +++ b/configurations.py @@ -115,6 +115,7 @@ class AgentParameters(Parameters): replace_mse_with_huber_loss = False load_memory_from_file_path = None collect_new_data = True + input_rescaler = 255.0 # PPO related params target_kl_divergence = 0.01 @@ -154,6 +155,8 @@ class EnvironmentParameters(Parameters): desired_observation_width = 76 desired_observation_height = 60 normalize_observation = False + crop_observation = False + random_initialization_steps = 0 reward_scaling = 1.0 reward_clipping_min = None reward_clipping_max = None @@ -290,6 +293,8 @@ class Atari(EnvironmentParameters): desired_observation_width = 84 reward_clipping_max = 1.0 reward_clipping_min = -1.0 + random_initialization_steps = 30 + crop_observation = False # in the original paper the observation is cropped but not in the Nature paper class Doom(EnvironmentParameters): @@ -355,6 +360,7 @@ class DQN(AgentParameters): class DDQN(DQN): type = 'DDQNAgent' + num_steps_between_copying_online_weights_to_target = 30000 class DuelingDQN(DQN): @@ -384,17 +390,19 @@ class QuantileRegressionDQN(DQN): class NEC(AgentParameters): type = 'NECAgent' - optimizer_type = 'RMSProp' + optimizer_type = 'Adam' input_types = {'observation': InputTypes.Observation} output_types = [OutputTypes.DNDQ] loss_weights = [1.0] dnd_size = 500000 l2_norm_added_delta = 0.001 - new_value_shift_coefficient = 0.1 + new_value_shift_coefficient = 0.1 # alpha number_of_knn = 50 n_step = 100 bootstrap_total_return_from_old_policy = True - DND_key_error_threshold = 0.1 + DND_key_error_threshold = 0 + input_rescaler = 1.0 + num_consecutive_playing_steps = 4 class ActorCritic(AgentParameters): diff --git a/environments/gym_environment_wrapper.py b/environments/gym_environment_wrapper.py index 6c94aaa..c821bf8 100644 --- a/environments/gym_environment_wrapper.py +++ b/environments/gym_environment_wrapper.py @@ -19,6 +19,7 @@ from logger import * import gym import numpy as np import time +import random try: import roboschool from OpenGL import GL @@ -59,7 +60,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper): # self.env_spec = gym.spec(self.env_id) self.env.frameskip = self.frame_skip self.discrete_controls = type(self.env.action_space) != gym.spaces.box.Box - + self.random_initialization_steps = 0 self.state = self.reset(True)['state'] # render @@ -113,6 +114,7 @@ class GymEnvironmentWrapper(EnvironmentWrapper): else: self.timestep_limit = None self.measurements_size = len(self.step(0)['info'].keys()) + self.random_initialization_steps = self.tp.env.random_initialization_steps def _wrap_state(self, state): if isinstance(self.env.observation_space, gym.spaces.Dict): @@ -155,8 +157,9 @@ class GymEnvironmentWrapper(EnvironmentWrapper): def _preprocess_state(self, state): # TODO: move this into wrapper - if any(env in self.env_id for env in ["Breakout", "Pong"]): - # crop image + # crop image for atari games + # the image from the environment is 210x160 + if self.tp.env.crop_observation and hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'): state['observation'] = state['observation'][34:195, :, :] return state @@ -170,7 +173,16 @@ class GymEnvironmentWrapper(EnvironmentWrapper): self.env.seed(self.seed) self.state = self._wrap_state(self.env.reset()) - while self.state is None: + + # initialize the number of lives + if hasattr(self.env, 'env') and hasattr(self.env.env, 'ale'): + self.current_ale_lives = self.env.env.ale.lives() + + # simulate a random initial environment state by stepping for a random number of times between 0 and 30 + step_count = 0 + random_initialization_steps = random.randint(0, self.random_initialization_steps) + while self.state is None or step_count < random_initialization_steps: + step_count += 1 self.step(0) return self.state diff --git a/logger.py b/logger.py index 08d026f..c070cc0 100644 --- a/logger.py +++ b/logger.py @@ -115,6 +115,14 @@ class ScreenLogger(object): if default is not None: return default + def change_terminal_title(self, title: str): + """ + Changes the title of the terminal window + :param title: The new title + :return: None + """ + print("\x1b]2;{}\x07".format(title)) + class BaseLogger(object): def __init__(self): @@ -157,6 +165,7 @@ class Logger(BaseLogger): self.time = None self.experiments_path = "" self.last_line_idx_written_to_csv = 0 + self.experiment_name = "" def set_current_time(self, time): self.time = time @@ -205,7 +214,9 @@ class Logger(BaseLogger): def signal_value_exists(self, time, signal_name): try: - self.get_signal_value(time, signal_name) + value = self.get_signal_value(time, signal_name) + if value != value: # value is nan + return False except: return False return True @@ -229,7 +240,8 @@ class Logger(BaseLogger): if self.start_time: self.create_signal_value('Wall-Clock Time', time.time() - self.start_time, time=episode) else: - self.create_signal_value('Wall-Clock Time', time.time(), time=episode) + self.create_signal_value('Wall-Clock Time', 0, time=episode) + self.start_time = time.time() def create_gif(self, images, fps=10, name="Gif"): output_file = '{}_{}.gif'.format(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'), name) @@ -243,7 +255,7 @@ class Logger(BaseLogger): def remove_experiment_dir(self): shutil.rmtree(self.experiments_path) - def print_summary(self): + def summarize_experiment(self): screen.separator() screen.log_title("Results stored at: {}".format(self.experiments_path)) screen.log_title("Total runtime: {}".format(datetime.datetime.now() - self.time_started)) @@ -273,7 +285,8 @@ class Logger(BaseLogger): screen.error('Experiment name must be composed only of alphanumeric letters, ' 'underscores and dashes and should not be longer than 100 characters.') - return match.group(0) + self.experiment_name = match.group(0) + return self.experiment_name def get_experiment_path(self, experiment_name, create_path=True): general_experiments_path = os.path.join('./experiments/', experiment_name) diff --git a/memories/differentiable_neural_dictionary.py b/memories/differentiable_neural_dictionary.py index 4904151..1a1fdc7 100644 --- a/memories/differentiable_neural_dictionary.py +++ b/memories/differentiable_neural_dictionary.py @@ -83,6 +83,11 @@ class AnnoyDictionary(object): # Returns the stored embeddings and values of the closest embeddings def query(self, keys, k): + if not self.has_enough_entries(k): + # this will only happen when the DND is not yet populated with enough entries, which is only during heatup + # these values won't be used and therefore they are meaningless + return [0.0], [0.0], [0] + _, indices = self._get_k_nearest_neighbors_indices(keys, k) embeddings = [] @@ -94,7 +99,7 @@ class AnnoyDictionary(object): self.current_timestamp += 1 - return embeddings, values + return embeddings, values, indices def has_enough_entries(self, k): return self.curr_size > k and (self.built_capacity > k) @@ -133,9 +138,11 @@ class AnnoyDictionary(object): class QDND: - def __init__(self, dict_size, key_width, num_actions, new_value_shift_coefficient=0.1, key_error_threshold=0.01): + def __init__(self, dict_size, key_width, num_actions, new_value_shift_coefficient=0.1, key_error_threshold=0.01, + learning_rate=0.01): self.num_actions = num_actions self.dicts = [] + self.learning_rate = learning_rate # create a dict for each action for a in range(num_actions): @@ -155,16 +162,18 @@ class QDND: self.dicts[a].add(curr_action_embeddings, curr_action_values) return True - def query(self, embeddings, actions, k): + def query(self, embeddings, action, k): # query for nearest neighbors to the given embeddings dnd_embeddings = [] dnd_values = [] - for i, action in enumerate(actions): - embedding, value = self.dicts[action].query([embeddings[i]], k) + dnd_indices = [] + for i in range(len(embeddings)): + embedding, value, indices = self.dicts[action].query([embeddings[i]], k) dnd_embeddings.append(embedding[0]) dnd_values.append(value[0]) + dnd_indices.append(indices[0]) - return dnd_embeddings, dnd_values + return dnd_embeddings, dnd_values, dnd_indices def has_enough_entries(self, k): # check if each of the action dictionaries has at least k entries @@ -193,4 +202,5 @@ def load_dnd(model_dir): DND.dicts[a].index.add_item(idx, key) DND.dicts[a].index.build(50) + return DND diff --git a/memories/episodic_experience_replay.py b/memories/episodic_experience_replay.py index 1d7e61f..5930d78 100644 --- a/memories/episodic_experience_replay.py +++ b/memories/episodic_experience_replay.py @@ -16,6 +16,7 @@ from memories.memory import * import threading +from typing import Union class EpisodicExperienceReplay(Memory): @@ -103,7 +104,8 @@ class EpisodicExperienceReplay(Memory): if transition.game_over: self._num_transitions_in_complete_episodes += last_episode.length() self._length += 1 - self.buffer[-1].update_returns(self.discount, is_bootstrapped=self.return_is_bootstrapped, + self.buffer[-1].update_returns(self.discount, + is_bootstrapped=self.tp.agent.bootstrap_total_return_from_old_policy, n_step_return=self.tp.agent.n_step) self.buffer[-1].update_measurements_targets(self.tp.agent.num_predicted_steps_ahead) # self.buffer[-1].update_actions_probabilities() # used for off-policy policy optimization @@ -146,6 +148,17 @@ class EpisodicExperienceReplay(Memory): def get(self, index): return self.get_episode(index) + def get_last_complete_episode(self) -> Union[None, Episode]: + """ + Returns the last complete episode in the memory or None if there are no complete episodes + :return: None or the last complete episode + """ + last_complete_episode_index = self.num_complete_episodes()-1 + if last_complete_episode_index >= 0: + return self.get(last_complete_episode_index) + else: + return None + def update_last_transition_info(self, info): episode = self.buffer[-1] if episode.length() == 0: diff --git a/memories/memory.py b/memories/memory.py index 4c479e3..f4c6a87 100644 --- a/memories/memory.py +++ b/memories/memory.py @@ -80,9 +80,12 @@ class Episode(object): total_return += current_discount * np.pad(rewards[i:], (0, i), 'constant', constant_values=0) current_discount *= discount + # calculate the bootstrapped returns + bootstraps = np.array([np.squeeze(t.info['max_action_value']) for t in self.transitions[n_step_return:]]) + bootstrapped_return = total_return + current_discount * np.pad(bootstraps, (0, n_step_return), 'constant', + constant_values=0) if is_bootstrapped: - bootstraps = np.array([np.squeeze(t.info['action_value']) for t in self.transitions[n_step_return:]]) - total_return += current_discount * np.pad(bootstraps, (0, n_step_return), 'constant', constant_values=0) + total_return = bootstrapped_return for transition_idx in range(self.length()): self.transitions[transition_idx].total_return = total_return[transition_idx] @@ -114,7 +117,13 @@ class Episode(object): return self.returns_table def get_returns(self): - return [t.total_return for t in self.transitions] + return self.get_transitions_attribute('total_return') + + def get_transitions_attribute(self, attribute_name): + if hasattr(self.transitions[0], attribute_name): + return [t.__dict__[attribute_name] for t in self.transitions] + else: + raise ValueError("The transitions have no such attribute name") def to_batch(self): batch = [] @@ -141,14 +150,12 @@ class Transition(object): :param game_over: A boolean which should be True if the episode terminated after the execution of the action. """ - self.state = copy.deepcopy(state) - self.state['observation'] = np.array(self.state['observation'], copy=False) + self.state = state self.action = action self.reward = reward self.total_return = None if not next_state: next_state = state - self.next_state = copy.deepcopy(next_state) - self.next_state['observation'] = np.array(self.next_state['observation'], copy=False) + self.next_state = next_state self.game_over = game_over self.info = {} diff --git a/plot_atari.py b/plot_atari.py new file mode 100644 index 0000000..8732fb0 --- /dev/null +++ b/plot_atari.py @@ -0,0 +1,105 @@ +import argparse +import matplotlib +import matplotlib.pyplot as plt +from dashboard import SignalsFile +import os + + +class FigureMaker(object): + def __init__(self, path, cols, smoothness, signal_to_plot, x_axis): + self.experiments_path = path + self.environments = self.list_environments() + self.cols = cols + self.rows = int((len(self.environments) + cols - 1) / cols) + self.smoothness = smoothness + self.signal_to_plot = signal_to_plot + self.x_axis = x_axis + + params = { + 'axes.labelsize': 8, + 'font.size': 10, + 'legend.fontsize': 14, + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'text.usetex': False, + 'figure.figsize': [16, 30] + } + matplotlib.rcParams.update(params) + + def list_environments(self): + environments = sorted([e.name for e in os.scandir(args.path) if e.is_dir()]) + filtered_environments = self.filter_environments(environments) + return filtered_environments + + def filter_environments(self, environments): + filtered_environments = [] + for idx, environment in enumerate(environments): + path = os.path.join(args.path, environment) + experiments = [e.name for e in os.scandir(path) if e.is_dir()] + + # take only the last updated experiment directory + last_experiment_dir = max([os.path.join(path, root) for root in experiments], key=os.path.getctime) + + # make sure there is a csv file inside it + for file_path in os.listdir(last_experiment_dir): + full_file_path = os.path.join(last_experiment_dir, file_path) + if os.path.isfile(full_file_path) and file_path.endswith('.csv'): + filtered_environments.append((environment, full_file_path)) + + return filtered_environments + + def plot_figures(self): + for idx, (environment, full_file_path) in enumerate(self.environments): + print(environment) + axis = plt.subplot(self.rows, self.cols, idx + 1) + signals = SignalsFile(full_file_path) + signals.change_averaging_window(self.smoothness, force=True, signals=[self.signal_to_plot]) + steps = signals.bokeh_source.data[self.x_axis] + rewards = signals.bokeh_source.data[self.signal_to_plot] + + yloc = plt.MaxNLocator(4) + axis.yaxis.set_major_locator(yloc) + axis.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + plt.title(environment, fontsize=10, y=1.08) + plt.plot(steps, rewards, linewidth=0.8) + plt.subplots_adjust(hspace=2.0, wspace=0.4) + + def save_pdf(self, name): + plt.savefig(name + ".pdf", bbox_inches='tight') + + def show_figures(self): + plt.show() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--path', + help="(string) Root directory of the experiments", + default=None, + type=str) + parser.add_argument('-c', '--cols', + help="(int) Number of plot columns", + default=6, + type=int) + parser.add_argument('-s', '--smoothness', + help="(int) Number of consequent episodes to average over", + default=200, + type=int) + parser.add_argument('-sig', '--signal', + help="(str) The name of the signal to plot", + default='Evaluation Reward', + type=str) + parser.add_argument('-x', '--x_axis', + help="(str) The meaning of the x axis", + default='Total steps', + type=str) + parser.add_argument('-pdf', '--pdf', + help="(str) A name of a pdf to save to", + default='atari', + type=str) + args = parser.parse_args() + + maker = FigureMaker(args.path, cols=args.cols, smoothness=args.smoothness, signal_to_plot=args.signal, x_axis=args.x_axis) + maker.plot_figures() + maker.save_pdf(args.pdf) + maker.show_figures() diff --git a/presets.py b/presets.py index c0225ad..dcfb765 100644 --- a/presets.py +++ b/presets.py @@ -89,7 +89,6 @@ class Doom_Basic_QRDQN(Preset): self.num_heatup_steps = 1000 - class Doom_Basic_OneStepQ(Preset): def __init__(self): Preset.__init__(self, NStepQ, Doom, ExplorationParameters) @@ -408,8 +407,67 @@ class Breakout_DQN(Preset): self.exploration.evaluation_policy = 'EGreedy' self.exploration.evaluation_epsilon = 0.05 self.num_heatup_steps = 50000 + self.agent.num_consecutive_playing_steps = 4 self.evaluation_episodes = 1 - self.evaluate_every_x_episodes = 100 + self.evaluate_every_x_episodes = 25 + self.agent.replace_mse_with_huber_loss = True + # self.env.crop_observation = True # TODO: remove + # self.rescaling_interpolation_type = 'nearest' # TODO: remove + + +class Breakout_DDQN(Preset): + def __init__(self): + Preset.__init__(self, DDQN, Atari, ExplorationParameters) + self.env.level = 'BreakoutDeterministic-v4' + self.agent.num_steps_between_copying_online_weights_to_target = 30000 + self.learning_rate = 0.00025 + self.agent.num_transitions_in_experience_replay = 1000000 + self.exploration.initial_epsilon = 1.0 + self.exploration.final_epsilon = 0.01 + self.exploration.epsilon_decay_steps = 1000000 + self.exploration.evaluation_policy = 'EGreedy' + self.exploration.evaluation_epsilon = 0.001 + self.num_heatup_steps = 50000 + self.agent.num_consecutive_playing_steps = 4 + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 25 + self.agent.replace_mse_with_huber_loss = True + + +class Breakout_Dueling_DDQN(Preset): + def __init__(self): + Preset.__init__(self, DDQN, Atari, ExplorationParameters) + self.env.level = 'BreakoutDeterministic-v4' + self.agent.output_types = [OutputTypes.DuelingQ] + self.agent.num_steps_between_copying_online_weights_to_target = 30000 + self.learning_rate = 0.00025 + self.agent.num_transitions_in_experience_replay = 1000000 + self.exploration.initial_epsilon = 1.0 + self.exploration.final_epsilon = 0.01 + self.exploration.epsilon_decay_steps = 1000000 + self.exploration.evaluation_policy = 'EGreedy' + self.exploration.evaluation_epsilon = 0.001 + self.num_heatup_steps = 50000 + self.agent.num_consecutive_playing_steps = 4 + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 25 + self.agent.replace_mse_with_huber_loss = True + +class Alien_DQN(Preset): + def __init__(self): + Preset.__init__(self, DQN, Atari, ExplorationParameters) + self.env.level = 'AlienDeterministic-v4' + self.agent.num_steps_between_copying_online_weights_to_target = 10000 + self.learning_rate = 0.00025 + self.agent.num_transitions_in_experience_replay = 1000000 + self.exploration.initial_epsilon = 1.0 + self.exploration.final_epsilon = 0.1 + self.exploration.epsilon_decay_steps = 1000000 + self.exploration.evaluation_policy = 'EGreedy' + self.exploration.evaluation_epsilon = 0.05 + self.num_heatup_steps = 50000 + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 5 class Breakout_C51(Preset): @@ -846,7 +904,8 @@ class CartPole_NEC(Preset): self.num_heatup_steps = 1000 self.exploration.epsilon_decay_steps = 1000 self.exploration.final_epsilon = 0.1 - self.agent.discount = 1.0 + self.agent.discount = 0.99 + self.seed = 0 self.test = True self.test_max_step_threshold = 200 @@ -857,10 +916,16 @@ class Doom_Basic_NEC(Preset): def __init__(self): Preset.__init__(self, NEC, Doom, ExplorationParameters) self.env.level = 'basic' - self.agent.num_episodes_in_experience_replay = 200 - self.learning_rate = 0.00025 - self.num_heatup_steps = 1000 - self.agent.num_playing_steps_between_two_training_steps = 1 + self.learning_rate = 0.00001 + self.agent.num_transitions_in_experience_replay = 100000 + # self.exploration.initial_epsilon = 0.1 # TODO: try exploration + # self.exploration.final_epsilon = 0.1 + # self.exploration.epsilon_decay_steps = 1000000 + self.num_heatup_steps = 200 + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 5 + self.seed = 123 + class Montezuma_NEC(Preset): @@ -877,12 +942,20 @@ class Breakout_NEC(Preset): def __init__(self): Preset.__init__(self, NEC, Atari, ExplorationParameters) self.env.level = 'BreakoutDeterministic-v4' - self.learning_rate = 0.00025 + self.agent.num_steps_between_copying_online_weights_to_target = 10000 + self.learning_rate = 0.00001 self.agent.num_transitions_in_experience_replay = 1000000 - self.exploration.initial_epsilon = 1.0 + self.exploration.initial_epsilon = 0.1 self.exploration.final_epsilon = 0.1 self.exploration.epsilon_decay_steps = 1000000 - self.num_heatup_steps = 50000 + self.exploration.evaluation_policy = 'EGreedy' + self.exploration.evaluation_epsilon = 0.05 + self.num_heatup_steps = 1000 + self.env.reward_clipping_max = None + self.env.reward_clipping_min = None + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 25 + self.seed = 123 class Doom_Health_NEC(Preset): @@ -924,12 +997,54 @@ class Pong_NEC(Preset): def __init__(self): Preset.__init__(self, NEC, Atari, ExplorationParameters) self.env.level = 'PongDeterministic-v4' - self.learning_rate = 0.001 + self.learning_rate = 0.00001 self.agent.num_transitions_in_experience_replay = 100000 - self.exploration.initial_epsilon = 0.5 + self.exploration.initial_epsilon = 0.1 # TODO: try exploration self.exploration.final_epsilon = 0.1 self.exploration.epsilon_decay_steps = 1000000 + self.num_heatup_steps = 2000 + self.env.reward_clipping_max = None + self.env.reward_clipping_min = None + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 5 + self.env.crop_observation = True # TODO: remove + self.env.random_initialization_steps = 1 # TODO: remove + # self.seed = 123 + + +class Alien_NEC(Preset): + def __init__(self): + Preset.__init__(self, NEC, Atari, ExplorationParameters) + self.env.level = 'AlienDeterministic-v4' + self.learning_rate = 0.0001 + self.agent.num_transitions_in_experience_replay = 100000 + self.exploration.initial_epsilon = 0.1 # TODO: try exploration + self.exploration.final_epsilon = 0.1 + self.exploration.epsilon_decay_steps = 1000000 + self.num_heatup_steps = 3000 + self.env.reward_clipping_max = None + self.env.reward_clipping_min = None + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 5 + self.seed = 123 + + +class Pong_DQN(Preset): + def __init__(self): + Preset.__init__(self, DQN, Atari, ExplorationParameters) + self.env.level = 'PongDeterministic-v4' + self.agent.num_steps_between_copying_online_weights_to_target = 10000 + self.learning_rate = 0.00025 + self.agent.num_transitions_in_experience_replay = 1000000 + self.exploration.initial_epsilon = 1.0 + self.exploration.final_epsilon = 0.1 + self.exploration.epsilon_decay_steps = 1000000 + self.exploration.evaluation_policy = 'EGreedy' + self.exploration.evaluation_epsilon = 0.05 self.num_heatup_steps = 50000 + self.evaluation_episodes = 1 + self.evaluate_every_x_episodes = 5 + self.seed = 123 class CartPole_A3C(Preset): diff --git a/run_test.py b/run_test.py index 7e8f6ff..196056e 100644 --- a/run_test.py +++ b/run_test.py @@ -50,6 +50,9 @@ if __name__ == '__main__': parser.add_argument('-v', '--verbose', help="(flag) display verbose logs in the event of an error", action='store_true') + parser.add_argument('-l', '--list_presets', + help="(flag) list all the presets that are tested", + action='store_true') parser.add_argument('--stop_after_first_failure', help="(flag) stop executing tests after the first error", action='store_true') @@ -73,6 +76,14 @@ if __name__ == '__main__': presets_to_ignore = args.ignore_presets.split(',') else: presets_to_ignore = [] + + if args.list_presets: + for idx, preset_name in enumerate(presets_lists): + preset = eval('presets.{}()'.format(preset_name)) + if preset.test and preset_name not in presets_to_ignore: + print(preset_name) + exit(0) + for idx, preset_name in enumerate(presets_lists): preset = eval('presets.{}()'.format(preset_name)) if preset.test and preset_name not in presets_to_ignore: diff --git a/utils.py b/utils.py index 4eecbce..7f75ac5 100644 --- a/utils.py +++ b/utils.py @@ -21,6 +21,7 @@ import numpy as np import threading from subprocess import call, Popen import signal +import copy killed_processes = [] @@ -333,6 +334,23 @@ def switch_axes_order(observation, from_type='channels_first', to_type='channels return np.transpose(observation, (1, 0)) +class LazyStack(object): + """ + A lazy version of np.stack which avoids copying the memory until it is + needed. + """ + + def __init__(self, history, axis=None): + self.history = copy.copy(history) + self.axis = axis + + def __array__(self, dtype=None): + array = np.stack(self.history, axis=self.axis) + if dtype is not None: + array = array.astype(dtype) + return array + + def stack_observation(curr_stack, observation, stack_size): """ Adds a new observation to an existing stack of observations from previous time-steps.