Adding support for evaluation only mode with predefined number of steps (#225)

2025-12-18 03:30:19 +01:00 · 2019-03-03 10:03:45 +02:00
parent 2c1a9dbf20
commit 10220be9be
3 changed files with 24 additions and 16 deletions
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -397,8 +397,7 @@ class Agent(AgentInterface):
            success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
            self.agent_logger.create_signal_value(
                "Success Rate",
-                success_rate
-            )
+                success_rate)
            if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
                screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
                                 .format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
@@ -488,7 +487,8 @@ class Agent(AgentInterface):
        self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False)
        self.agent_logger.update_wall_clock_time(self.current_episode)

-        if self._phase != RunPhase.TEST:
+        # The following signals are created with meaningful values only when an evaluation phase is completed.
+        # Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase
        self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
        self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
        self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
--- a/rl_coach/base_parameters.py
+++ b/rl_coach/base_parameters.py
@@ -550,13 +550,14 @@ class AgentParameters(Parameters):


 class TaskParameters(Parameters):
-    def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: bool=False, use_cpu: bool=False,
+    def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: int=None, use_cpu: bool=False,
                 experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
                 checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False,
                 num_gpu: int=1):
        """
        :param framework_type: deep learning framework type. currently only tensorflow is supported
-        :param evaluate_only: the task will be used only for evaluating the model
+        :param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
+                                A value of 0 means that task will be evaluated for an infinite number of steps.
        :param use_cpu: use the cpu for this task
        :param experiment_path: the path to the directory which will store all the experiment outputs
        :param seed: a seed to use for the random numbers generator
@@ -583,13 +584,14 @@ class TaskParameters(Parameters):

 class DistributedTaskParameters(TaskParameters):
    def __init__(self, framework_type: Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str,
-                 task_index: int, evaluate_only: bool=False, num_tasks: int=None,
+                 task_index: int, evaluate_only: int=None, num_tasks: int=None,
                 num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None,
                 shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
                 checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False):
        """
        :param framework_type: deep learning framework type. currently only tensorflow is supported
-        :param evaluate_only: the task will be used only for evaluating the model
+        :param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
+                                A value of 0 means that task will be evaluated for an infinite number of steps.
        :param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are
                                        assigned
        :param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned
--- a/rl_coach/coach.py
+++ b/rl_coach/coach.py
@@ -76,8 +76,10 @@ def start_graph(graph_manager: 'GraphManager', task_parameters: 'TaskParameters'
    graph_manager.create_graph(task_parameters)

    # let the adventure begin
-    if task_parameters.evaluate_only:
-        graph_manager.evaluate(EnvironmentSteps(sys.maxsize))
+    if task_parameters.evaluate_only is not None:
+        steps_to_evaluate = task_parameters.evaluate_only if task_parameters.evaluate_only > 0 \
+            else sys.maxsize
+        graph_manager.evaluate(EnvironmentSteps(steps_to_evaluate))
    else:
        graph_manager.improve()
    graph_manager.close()
@@ -466,9 +468,13 @@ class CoachLauncher(object):
                                 "This option will save a replay buffer with the game play.",
                            action='store_true')
        parser.add_argument('--evaluate',
-                            help="(flag) Run evaluation only. This is a convenient way to disable "
-                                 "training in order to evaluate an existing checkpoint.",
-                            action='store_true')
+                            help="(int) Run evaluation only, for at least the given number of steps (note that complete "
+                                "episodes are evaluated). This is a convenient way to disable training in order "
+                                "to evaluate an existing checkpoint. If value is 0, or no value is provided, "
+                                "evaluation will run for an infinite number of steps.",
+                            nargs='?',
+                            const=0,
+                            type=int)
        parser.add_argument('-v', '--verbosity',
                            help="(flag) Sets the verbosity level of Coach print outs. Can be either low or high.",
                            default="low",
@@ -659,7 +665,7 @@ class CoachLauncher(object):
                worker_hosts=worker_hosts,
                job_type=job_type,
                task_index=task_index,
-                evaluate_only=evaluation_worker,
+                evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely
                use_cpu=args.use_cpu,
                num_tasks=total_tasks,  # training tasks + 1 evaluation task
                num_training_tasks=args.num_workers,