mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Adding support for evaluation only mode with predefined number of steps (#225)
This commit is contained in:
@@ -397,8 +397,7 @@ class Agent(AgentInterface):
|
||||
success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
|
||||
self.agent_logger.create_signal_value(
|
||||
"Success Rate",
|
||||
success_rate
|
||||
)
|
||||
success_rate)
|
||||
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
|
||||
screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
|
||||
.format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
|
||||
@@ -488,10 +487,11 @@ class Agent(AgentInterface):
|
||||
self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False)
|
||||
self.agent_logger.update_wall_clock_time(self.current_episode)
|
||||
|
||||
if self._phase != RunPhase.TEST:
|
||||
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
|
||||
# The following signals are created with meaningful values only when an evaluation phase is completed.
|
||||
# Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase
|
||||
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
|
||||
|
||||
for signal in self.episode_signals:
|
||||
self.agent_logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean())
|
||||
|
||||
Reference in New Issue
Block a user