mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Adding support for evaluation only mode with predefined number of steps (#225)
This commit is contained in:
@@ -397,8 +397,7 @@ class Agent(AgentInterface):
|
||||
success_rate = self.num_successes_across_evaluation_episodes / self.num_evaluation_episodes_completed
|
||||
self.agent_logger.create_signal_value(
|
||||
"Success Rate",
|
||||
success_rate
|
||||
)
|
||||
success_rate)
|
||||
if self.ap.is_a_highest_level_agent or self.ap.task_parameters.verbosity == "high":
|
||||
screen.log_title("{}: Finished evaluation phase. Success rate = {}, Avg Total Reward = {}"
|
||||
.format(self.name, np.round(success_rate, 2), np.round(evaluation_reward, 2)))
|
||||
@@ -488,7 +487,8 @@ class Agent(AgentInterface):
|
||||
self.agent_logger.create_signal_value('Update Target Network', 0, overwrite=False)
|
||||
self.agent_logger.update_wall_clock_time(self.current_episode)
|
||||
|
||||
if self._phase != RunPhase.TEST:
|
||||
# The following signals are created with meaningful values only when an evaluation phase is completed.
|
||||
# Creating with default NaNs for any HEATUP/TRAIN/TEST episode which is not the last in an evaluation phase
|
||||
self.agent_logger.create_signal_value('Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Shaped Evaluation Reward', np.nan, overwrite=False)
|
||||
self.agent_logger.create_signal_value('Success Rate', np.nan, overwrite=False)
|
||||
|
||||
@@ -550,13 +550,14 @@ class AgentParameters(Parameters):
|
||||
|
||||
|
||||
class TaskParameters(Parameters):
|
||||
def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: bool=False, use_cpu: bool=False,
|
||||
def __init__(self, framework_type: Frameworks=Frameworks.tensorflow, evaluate_only: int=None, use_cpu: bool=False,
|
||||
experiment_path='/tmp', seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
|
||||
checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False,
|
||||
num_gpu: int=1):
|
||||
"""
|
||||
:param framework_type: deep learning framework type. currently only tensorflow is supported
|
||||
:param evaluate_only: the task will be used only for evaluating the model
|
||||
:param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
|
||||
A value of 0 means that task will be evaluated for an infinite number of steps.
|
||||
:param use_cpu: use the cpu for this task
|
||||
:param experiment_path: the path to the directory which will store all the experiment outputs
|
||||
:param seed: a seed to use for the random numbers generator
|
||||
@@ -583,13 +584,14 @@ class TaskParameters(Parameters):
|
||||
|
||||
class DistributedTaskParameters(TaskParameters):
|
||||
def __init__(self, framework_type: Frameworks, parameters_server_hosts: str, worker_hosts: str, job_type: str,
|
||||
task_index: int, evaluate_only: bool=False, num_tasks: int=None,
|
||||
task_index: int, evaluate_only: int=None, num_tasks: int=None,
|
||||
num_training_tasks: int=None, use_cpu: bool=False, experiment_path=None, dnd=None,
|
||||
shared_memory_scratchpad=None, seed=None, checkpoint_save_secs=None, checkpoint_restore_dir=None,
|
||||
checkpoint_save_dir=None, export_onnx_graph: bool=False, apply_stop_condition: bool=False):
|
||||
"""
|
||||
:param framework_type: deep learning framework type. currently only tensorflow is supported
|
||||
:param evaluate_only: the task will be used only for evaluating the model
|
||||
:param evaluate_only: if not None, the task will be used only for evaluating the model for the given number of steps.
|
||||
A value of 0 means that task will be evaluated for an infinite number of steps.
|
||||
:param parameters_server_hosts: comma-separated list of hostname:port pairs to which the parameter servers are
|
||||
assigned
|
||||
:param worker_hosts: comma-separated list of hostname:port pairs to which the workers are assigned
|
||||
|
||||
@@ -76,8 +76,10 @@ def start_graph(graph_manager: 'GraphManager', task_parameters: 'TaskParameters'
|
||||
graph_manager.create_graph(task_parameters)
|
||||
|
||||
# let the adventure begin
|
||||
if task_parameters.evaluate_only:
|
||||
graph_manager.evaluate(EnvironmentSteps(sys.maxsize))
|
||||
if task_parameters.evaluate_only is not None:
|
||||
steps_to_evaluate = task_parameters.evaluate_only if task_parameters.evaluate_only > 0 \
|
||||
else sys.maxsize
|
||||
graph_manager.evaluate(EnvironmentSteps(steps_to_evaluate))
|
||||
else:
|
||||
graph_manager.improve()
|
||||
graph_manager.close()
|
||||
@@ -466,9 +468,13 @@ class CoachLauncher(object):
|
||||
"This option will save a replay buffer with the game play.",
|
||||
action='store_true')
|
||||
parser.add_argument('--evaluate',
|
||||
help="(flag) Run evaluation only. This is a convenient way to disable "
|
||||
"training in order to evaluate an existing checkpoint.",
|
||||
action='store_true')
|
||||
help="(int) Run evaluation only, for at least the given number of steps (note that complete "
|
||||
"episodes are evaluated). This is a convenient way to disable training in order "
|
||||
"to evaluate an existing checkpoint. If value is 0, or no value is provided, "
|
||||
"evaluation will run for an infinite number of steps.",
|
||||
nargs='?',
|
||||
const=0,
|
||||
type=int)
|
||||
parser.add_argument('-v', '--verbosity',
|
||||
help="(flag) Sets the verbosity level of Coach print outs. Can be either low or high.",
|
||||
default="low",
|
||||
@@ -659,7 +665,7 @@ class CoachLauncher(object):
|
||||
worker_hosts=worker_hosts,
|
||||
job_type=job_type,
|
||||
task_index=task_index,
|
||||
evaluate_only=evaluation_worker,
|
||||
evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely
|
||||
use_cpu=args.use_cpu,
|
||||
num_tasks=total_tasks, # training tasks + 1 evaluation task
|
||||
num_training_tasks=args.num_workers,
|
||||
|
||||
Reference in New Issue
Block a user