1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 03:30:19 +01:00

Adding support for evaluation only mode with predefined number of steps (#225)

This commit is contained in:
Gal Novik
2019-03-03 10:03:45 +02:00
committed by Gal Leibovich
parent 2c1a9dbf20
commit 10220be9be
3 changed files with 24 additions and 16 deletions

View File

@@ -397,8 +397,7 @@ class Agent(AgentInterface):
success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
self.agent_logger.create_signal_value(
"Success Rate",
success_rate
)
success_rate)
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
.format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
@@ -488,7 +487,8 @@ class Agent(AgentInterface):
self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False)
self.agent_logger.update_wall_clock_time(self.current_episode)
if self._phase != RunPhase.TEST:
# The following signals are created with meaningful values only when an evaluation phase is completed.
# Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)

View File

@@ -550,13 +550,14 @@ class AgentParameters(Parameters):
class TaskParameters(Parameters):
def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: bool=False, use_cpu: bool=False,
def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: int=None, use_cpu: bool=False,
experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False,
num_gpu: int=1):
"""
:param framework_type: deep learning framework type. currently only tensorflow is supported
:param evaluate_only: the task will be used only for evaluating the model
:param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
A value of 0 means that task will be evaluated for an infinite number of steps.
:param use_cpu: use the cpu for this task
:param experiment_path: the path to the directory which will store all the experiment outputs
:param seed: a seed to use for the random numbers generator
@@ -583,13 +584,14 @@ class TaskParameters(Parameters):
class DistributedTaskParameters(TaskParameters):
def __init__(self, framework_type: Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str,
task_index: int, evaluate_only: bool=False, num_tasks: int=None,
task_index: int, evaluate_only: int=None, num_tasks: int=None,
num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None,
shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False):
"""
:param framework_type: deep learning framework type. currently only tensorflow is supported
:param evaluate_only: the task will be used only for evaluating the model
:param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
A value of 0 means that task will be evaluated for an infinite number of steps.
:param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are
assigned
:param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned

View File

@@ -76,8 +76,10 @@ def start_graph(graph_manager: 'GraphManager', task_parameters: 'TaskParameters'
graph_manager.create_graph(task_parameters)
# let the adventure begin
if task_parameters.evaluate_only:
graph_manager.evaluate(EnvironmentSteps(sys.maxsize))
if task_parameters.evaluate_only is not None:
steps_to_evaluate = task_parameters.evaluate_only if task_parameters.evaluate_only > 0 \
else sys.maxsize
graph_manager.evaluate(EnvironmentSteps(steps_to_evaluate))
else:
graph_manager.improve()
graph_manager.close()
@@ -466,9 +468,13 @@ class CoachLauncher(object):
"This option will save a replay buffer with the game play.",
action='store_true')
parser.add_argument('--evaluate',
help="(flag) Run evaluation only. This is a convenient way to disable "
"training in order to evaluate an existing checkpoint.",
action='store_true')
help="(int) Run evaluation only, for at least the given number of steps (note that complete "
"episodes are evaluated). This is a convenient way to disable training in order "
"to evaluate an existing checkpoint. If value is 0, or no value is provided, "
"evaluation will run for an infinite number of steps.",
nargs='?',
const=0,
type=int)
parser.add_argument('-v', '--verbosity',
help="(flag) Sets the verbosity level of Coach print outs. Can be either low or high.",
default="low",
@@ -659,7 +665,7 @@ class CoachLauncher(object):
worker_hosts=worker_hosts,
job_type=job_type,
task_index=task_index,
evaluate_only=evaluation_worker,
evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely
use_cpu=args.use_cpu,
num_tasks=total_tasks, # training tasks + 1 evaluation task
num_training_tasks=args.num_workers,