From a0f25034c3d0e2c733fd7c53fab0e40cfb0977b8 Mon Sep 17 00:00:00 2001 From: Thom Lane Date: Fri, 16 Nov 2018 08:22:00 -0800 Subject: [PATCH] Added average total reward to logging after evaluation phase completes. (#93) --- rl_coach/agents/agent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index fac4325..bd1fc71 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -367,9 +367,9 @@ class Agent(AgentInterface): # we write to the next episode, because it could be that the current episode was already written # to disk and then we won't write it again self.agent_logger.set_current_time(self.current_episode + 1) + evaluation_reward = self.accumulated_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed self.agent_logger.create_signal_value( - 'Evaluation Reward', - self.accumulated_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed) + 'Evaluation Reward', evaluation_reward) self.agent_logger.create_signal_value( 'Shaped Evaluation Reward', self.accumulated_shaped_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed) @@ -379,8 +379,8 @@ class Agent(AgentInterface): success_rate ) if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high": - screen.log_title("{}: Finished evaluation phase. Success rate = {}" - .format(self.name, np.round(success_rate, 2))) + screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}" + .format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2))) def call_memory(self, func, args=()): """