1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 03:30:19 +01:00

Adding support for evaluation only mode with predefined number of steps (#225)

This commit is contained in:
Gal Novik
2019-03-03 10:03:45 +02:00
committed by Gal Leibovich
parent 2c1a9dbf20
commit 10220be9be
3 changed files with 24 additions and 16 deletions

View File

@@ -397,8 +397,7 @@ class Agent(AgentInterface):
success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
self.agent_logger.create_signal_value(
"Success Rate",
success_rate
)
success_rate)
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
.format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
@@ -488,10 +487,11 @@ class Agent(AgentInterface):
self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False)
self.agent_logger.update_wall_clock_time(self.current_episode)
if self._phase != RunPhase.TEST:
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
# The following signals are created with meaningful values only when an evaluation phase is completed.
# Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
for signal in self.episode_signals:
self.agent_logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean())