From bf0a65eefd87b62eb0d6d9bde35279bcd6567643 Mon Sep 17 00:00:00 2001 From: Cody Hsieh Date: Thu, 17 Jan 2019 14:08:05 -0800 Subject: [PATCH] remove redundant observe ignore (#168) --- rl_coach/agents/agent.py | 41 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index 7d6bf92..4a1fbf1 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -977,32 +977,25 @@ class Agent(AgentInterface): :return: """ - # if we are in the first step in the episode, then we don't have a a next state and a reward and thus no - # transition yet, and therefore we don't need to store anything in the memory. - # also we did not reach the goal yet. - if self.current_episode_steps_counter == 0: - # initialize the current state - return transition.game_over - else: - # sum up the total shaped reward - self.total_shaped_reward_in_current_episode += transition.reward - self.total_reward_in_current_episode += transition.reward - self.shaped_reward.add_sample(transition.reward) - self.reward.add_sample(transition.reward) + # sum up the total shaped reward + self.total_shaped_reward_in_current_episode += transition.reward + self.total_reward_in_current_episode += transition.reward + self.shaped_reward.add_sample(transition.reward) + self.reward.add_sample(transition.reward) + + # create and store the transition + if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: + # for episodic memories we keep the transitions in a local buffer until the episode is ended. + # for regular memories we insert the transitions directly to the memory + self.current_episode_buffer.insert(transition) + if not isinstance(self.memory, EpisodicExperienceReplay) \ + and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: + self.call_memory('store', transition) - # create and store the transition - if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: - # for episodic memories we keep the transitions in a local buffer until the episode is ended. - # for regular memories we insert the transitions directly to the memory - self.current_episode_buffer.insert(transition) - if not isinstance(self.memory, EpisodicExperienceReplay) \ - and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: - self.call_memory('store', transition) + if self.ap.visualization.dump_in_episode_signals: + self.update_step_in_episode_log() - if self.ap.visualization.dump_in_episode_signals: - self.update_step_in_episode_log() - - return transition.game_over + return transition.game_over # TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create # an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]