mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
Added average total reward to logging after evaluation phase completes. (#93)
This commit is contained in:
committed by
Scott Leishman
parent
81bac050d7
commit
a0f25034c3
@@ -367,9 +367,9 @@ class Agent(AgentInterface):
|
|||||||
# we write to the next episode, because it could be that the current episode was already written
|
# we write to the next episode, because it could be that the current episode was already written
|
||||||
# to disk and then we won't write it again
|
# to disk and then we won't write it again
|
||||||
self.agent_logger.set_current_time(self.current_episode + 1)
|
self.agent_logger.set_current_time(self.current_episode + 1)
|
||||||
|
evaluation_reward = self.accumulated_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed
|
||||||
self.agent_logger.create_signal_value(
|
self.agent_logger.create_signal_value(
|
||||||
'Evaluation Reward',
|
'Evaluation Reward', evaluation_reward)
|
||||||
self.accumulated_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed)
|
|
||||||
self.agent_logger.create_signal_value(
|
self.agent_logger.create_signal_value(
|
||||||
'Shaped Evaluation Reward',
|
'Shaped Evaluation Reward',
|
||||||
self.accumulated_shaped_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed)
|
self.accumulated_shaped_rewards_across_evaluation_episodes / self.num_evaluation_episodes_completed)
|
||||||
@@ -379,8 +379,8 @@ class Agent(AgentInterface):
|
|||||||
success_rate
|
success_rate
|
||||||
)
|
)
|
||||||
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
|
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
|
||||||
screen.log_title("{}: Finished evaluation phase. Success rate = {}"
|
screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
|
||||||
.format(self.name, np.round(success_rate, 2)))
|
.format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
|
||||||
|
|
||||||
def call_memory(self, func, args=()):
|
def call_memory(self, func, args=()):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user