From 10220be9befb142853ac44b4c06445c1007747c7 Mon Sep 17 00:00:00 2001 From: Gal Novik Date: Sun, 3 Mar 2019 10:03:45 +0200 Subject: [PATCH] Adding support for evaluation only mode with predefined number of steps (#225) --- rl_coach/agents/agent.py | 12 ++++++------ rl_coach/base_parameters.py | 10 ++++++---- rl_coach/coach.py | 18 ++++++++++++------ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index dd0f3da..e7706c7 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -397,8 +397,7 @@ class Agent(AgentInterface): success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed self.agent_logger.create_signal_value( "Success Rate", - success_rate - ) + success_rate) if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high": screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}" .format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2))) @@ -488,10 +487,11 @@ class Agent(AgentInterface): self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False) self.agent_logger.update_wall_clock_time(self.current_episode) - if self._phase != RunPhase.TEST: - self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False) - self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False) - self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False) + # The following signals are created with meaningful values only when an evaluation phase is completed. + # Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase + self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False) + self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False) + self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False) for signal in self.episode_signals: self.agent_logger.create_signal_value("{}/Mean".format(signal.name), signal.get_mean()) diff --git a/rl_coach/base_parameters.py b/rl_coach/base_parameters.py index da368c3..3c03de8 100644 --- a/rl_coach/base_parameters.py +++ b/rl_coach/base_parameters.py @@ -550,13 +550,14 @@ class AgentParameters(Parameters): class TaskParameters(Parameters): - def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: bool=False, use_cpu: bool=False, + def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: int=None, use_cpu: bool=False, experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False, num_gpu: int=1): """ :param framework_type: deep learning framework type. currently only tensorflow is supported - :param evaluate_only: the task will be used only for evaluating the model + :param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps. + A value of 0 means that task will be evaluated for an infinite number of steps. :param use_cpu: use the cpu for this task :param experiment_path: the path to the directory which will store all the experiment outputs :param seed: a seed to use for the random numbers generator @@ -583,13 +584,14 @@ class TaskParameters(Parameters): class DistributedTaskParameters(TaskParameters): def __init__(self, framework_type: Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str, - task_index: int, evaluate_only: bool=False, num_tasks: int=None, + task_index: int, evaluate_only: int=None, num_tasks: int=None, num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None, shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None, checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False): """ :param framework_type: deep learning framework type. currently only tensorflow is supported - :param evaluate_only: the task will be used only for evaluating the model + :param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps. + A value of 0 means that task will be evaluated for an infinite number of steps. :param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are assigned :param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned diff --git a/rl_coach/coach.py b/rl_coach/coach.py index 944bd16..304cf83 100644 --- a/rl_coach/coach.py +++ b/rl_coach/coach.py @@ -76,8 +76,10 @@ def start_graph(graph_manager: 'GraphManager', task_parameters: 'TaskParameters' graph_manager.create_graph(task_parameters) # let the adventure begin - if task_parameters.evaluate_only: - graph_manager.evaluate(EnvironmentSteps(sys.maxsize)) + if task_parameters.evaluate_only is not None: + steps_to_evaluate = task_parameters.evaluate_only if task_parameters.evaluate_only > 0 \ + else sys.maxsize + graph_manager.evaluate(EnvironmentSteps(steps_to_evaluate)) else: graph_manager.improve() graph_manager.close() @@ -466,9 +468,13 @@ class CoachLauncher(object): "This option will save a replay buffer with the game play.", action='store_true') parser.add_argument('--evaluate', - help="(flag) Run evaluation only. This is a convenient way to disable " - "training in order to evaluate an existing checkpoint.", - action='store_true') + help="(int) Run evaluation only, for at least the given number of steps (note that complete " + "episodes are evaluated). This is a convenient way to disable training in order " + "to evaluate an existing checkpoint. If value is 0, or no value is provided, " + "evaluation will run for an infinite number of steps.", + nargs='?', + const=0, + type=int) parser.add_argument('-v', '--verbosity', help="(flag) Sets the verbosity level of Coach print outs. Can be either low or high.", default="low", @@ -659,7 +665,7 @@ class CoachLauncher(object): worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, - evaluate_only=evaluation_worker, + evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers,