Adding support for evaluation only mode with predefined number of steps (#225)

2026-03-11 12:05:47 +01:00 · 2019-03-03 10:03:45 +02:00
parent 2c1a9dbf20
commit 10220be9be
3 changed files with 24 additions and 16 deletions
--- a/rl_coach/coach.py
+++ b/rl_coach/coach.py
@@ -76,8 +76,10 @@ def start_graph(graph_manager: 'GraphManager', task_parameters: 'TaskParameters'
    graph_manager.create_graph(task_parameters)

    # let the adventure begin
-    if task_parameters.evaluate_only:
-        graph_manager.evaluate(EnvironmentSteps(sys.maxsize))
+    if task_parameters.evaluate_only is not None:
+        steps_to_evaluate = task_parameters.evaluate_only if task_parameters.evaluate_only > 0 \
+            else sys.maxsize
+        graph_manager.evaluate(EnvironmentSteps(steps_to_evaluate))
    else:
        graph_manager.improve()
    graph_manager.close()
@@ -466,9 +468,13 @@ class CoachLauncher(object):
                                 "This option will save a replay buffer with the game play.",
                            action='store_true')
        parser.add_argument('--evaluate',
-                            help="(flag) Run evaluation only. This is a convenient way to disable "
-                                 "training in order to evaluate an existing checkpoint.",
-                            action='store_true')
+                            help="(int) Run evaluation only, for at least the given number of steps (note that complete "
+                                "episodes are evaluated). This is a convenient way to disable training in order "
+                                "to evaluate an existing checkpoint. If value is 0, or no value is provided, "
+                                "evaluation will run for an infinite number of steps.",
+                            nargs='?',
+                            const=0,
+                            type=int)
        parser.add_argument('-v', '--verbosity',
                            help="(flag) Sets the verbosity level of Coach print outs. Can be either low or high.",
                            default="low",
@@ -659,7 +665,7 @@ class CoachLauncher(object):
                worker_hosts=worker_hosts,
                job_type=job_type,
                task_index=task_index,
-                evaluate_only=evaluation_worker,
+                evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely
                use_cpu=args.use_cpu,
                num_tasks=total_tasks,  # training tasks + 1 evaluation task
                num_training_tasks=args.num_workers,