coach/coach.py

#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import atexit
import json
import os
import re
import subprocess
import sys
import time

import agents
import argparse
import configurations as conf
import environments
import logger
import presets
import utils


if len(set(logger.failed_imports)) > 0:
    logger.screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(logger.failed_imports))))


def set_framework(framework_type):
    # choosing neural network framework
    framework = conf.Frameworks().get(framework_type)
    sess = None
    if framework == conf.Frameworks.TensorFlow:
        import tensorflow as tf
        config = tf.ConfigProto()
        config.allow_soft_placement = True
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.2
        sess = tf.Session(config=config)
    elif framework == conf.Frameworks.Neon:
        import ngraph as ng
        sess = ng.transformers.make_transformer()
    logger.screen.log_title("Using {} framework".format(conf.Frameworks().to_string(framework)))
    return sess


def check_input_and_fill_run_dict(parser):
    args = parser.parse_args()

    # if no arg is given
    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    # list available presets
    if args.list:
        presets_lists = utils.list_all_classes_in_module(presets)
        logger.screen.log_title("Available Presets:")
        for preset in presets_lists:
            print(preset)
        sys.exit(0)

    # check inputs
    try:
        # num_workers = int(args.num_workers)
        num_workers = int(re.match("^\d+$", args.num_workers).group(0))
    except ValueError:
        logger.screen.error("Parameter num_workers should be an integer.")

    preset_names = utils.list_all_classes_in_module(presets)
    if args.preset is not None and args.preset not in preset_names:
        logger.screen.error("A non-existing preset was selected. ")

    if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir):
        logger.screen.error("The requested checkpoint folder to load from does not exist. ")

    if args.save_model_sec is not None:
        try:
            args.save_model_sec = int(args.save_model_sec)
        except ValueError:
            logger.screen.error("Parameter save_model_sec should be an integer.")

    if args.preset is None and (args.agent_type is None or args.environment_type is None
                                       or args.exploration_policy_type is None) and not args.play:
        logger.screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
                     ' environment_type and exploration_policy_type to assemble a preset. '
                     '\nAt least one of these parameters was not given.')
    elif args.preset is None and args.play and args.environment_type is None:
        logger.screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,'
                     ' the user is expected to input the desired environment_type and level.'
                     '\nAt least one of these parameters was not given.')
    elif args.preset is None and args.play and args.environment_type:
        args.agent_type = 'Human'
        args.exploration_policy_type = 'ExplorationParameters'

    # get experiment name and path
    experiment_name = logger.logger.get_experiment_name(args.experiment_name)
    experiment_path = logger.logger.get_experiment_path(experiment_name)

    if args.play and num_workers > 1:
        logger.screen.warning("Playing the game as a human is only available with a single worker. "
                       "The number of workers will be reduced to 1")
        num_workers = 1

    # fill run_dict
    run_dict = dict()
    run_dict['agent_type'] = args.agent_type
    run_dict['environment_type'] = args.environment_type
    run_dict['exploration_policy_type'] = args.exploration_policy_type
    run_dict['level'] = args.level
    run_dict['preset'] = args.preset
    run_dict['custom_parameter'] = args.custom_parameter
    run_dict['experiment_path'] = experiment_path
    run_dict['framework'] = conf.Frameworks().get(args.framework)
    run_dict['play'] = args.play
    run_dict['evaluate'] = args.evaluate# or args.play

    # multi-threading parameters
    run_dict['num_threads'] = num_workers

    # checkpoints
    run_dict['save_model_sec'] = args.save_model_sec
    run_dict['save_model_dir'] = experiment_path if args.save_model_sec is not None else None
    run_dict['checkpoint_restore_dir'] = args.checkpoint_restore_dir

    # visualization
    run_dict['visualization.dump_gifs'] = args.dump_gifs
    run_dict['visualization.render'] = args.render
    run_dict['visualization.tensorboard'] = args.tensorboard

    return args, run_dict


def run_dict_to_json(_run_dict, task_id=''):
    if task_id != '':
        json_path = os.path.join(_run_dict['experiment_path'], 'run_dict_worker{}.json'.format(task_id))
    else:
        json_path = os.path.join(_run_dict['experiment_path'], 'run_dict.json')

    with open(json_path, 'w') as outfile:
        json.dump(_run_dict, outfile, indent=2)

    return json_path


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--preset',
                        help="(string) Name of a preset to run (as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-l', '--list',
                        help="(flag) List all available presets",
                        action='store_true')
    parser.add_argument('-e', '--experiment_name',
                        help="(string) Experiment name to be used to store the results.",
                        default='',
                        type=str)
    parser.add_argument('-r', '--render',
                        help="(flag) Render environment",
                        action='store_true')
    parser.add_argument('-f', '--framework',
                        help="(string) Neural network framework. Available values: tensorflow, neon",
                        default='tensorflow',
                        type=str)
    parser.add_argument('-n', '--num_workers',
                        help="(int) Number of workers for multi-process based agents, e.g. A3C",
                        default='1',
                        type=str)
    parser.add_argument('--play',
                        help="(flag) Play as a human by controlling the game with the keyboard. "
                             "This option will save a replay buffer with the game play.",
                        action='store_true')
    parser.add_argument('--evaluate',
                        help="(flag) Run evaluation only. This is a convenient way to disable "
                             "training in order to evaluate an existing checkpoint.",
                        action='store_true')
    parser.add_argument('-v', '--verbose',
                        help="(flag) Don't suppress TensorFlow debug prints.",
                        action='store_true')
    parser.add_argument('-s', '--save_model_sec',
                        help="(int) Time in seconds between saving checkpoints of the model.",
                        default=None,
                        type=int)
    parser.add_argument('-crd', '--checkpoint_restore_dir',
                        help='(string) Path to a folder containing a checkpoint to restore the model from.',
                        type=str)
    parser.add_argument('-dg', '--dump_gifs',
                        help="(flag) Enable the gif saving functionality.",
                        action='store_true')
    parser.add_argument('-at', '--agent_type',
                        help="(string) Choose an agent type class to override on top of the selected preset. "
                             "If no preset is defined, a preset can be set from the command-line by combining settings "
                             "which are set by using --agent_type, --experiment_type, --environemnt_type",
                        default=None,
                        type=str)
    parser.add_argument('-et', '--environment_type',
                        help="(string) Choose an environment type class to override on top of the selected preset."
                             "If no preset is defined, a preset can be set from the command-line by combining settings "
                             "which are set by using --agent_type, --experiment_type, --environemnt_type",
                        default=None,
                        type=str)
    parser.add_argument('-ept', '--exploration_policy_type',
                        help="(string) Choose an exploration policy type class to override on top of the selected "
                             "preset."
                             "If no preset is defined, a preset can be set from the command-line by combining settings "
                             "which are set by using --agent_type, --experiment_type, --environemnt_type"
                        ,
                        default=None,
                        type=str)
    parser.add_argument('-lvl', '--level',
                        help="(string) Choose the level that will be played in the environment that was selected."
                             "This value will override the level parameter in the environment class."
                        ,
                        default=None,
                        type=str)
    parser.add_argument('-cp', '--custom_parameter',
                        help="(string) Semicolon separated parameters used to override specific parameters on top of"
                             " the selected preset (or on top of the command-line assembled one). "
                             "Whenever a parameter value is a string, it should be inputted as '\\\"string\\\"'. "
                             "For ex.: "
                             "\"visualization.render=False; num_training_iterations=500; optimizer='rmsprop'\"",
                        default=None,
                        type=str)
    parser.add_argument('--print_parameters',
                        help="(flag) Print tuning_parameters to stdout",
                        action='store_true')
    parser.add_argument('-tb', '--tensorboard',
                        help="(flag) When using the TensorFlow backend, enable TensorBoard log dumps. ",
                        action='store_true')
    parser.add_argument('-ns', '--no_summary',
                        help="(flag) Prevent Coach from printing a summary and asking questions at the end of runs",
                        action='store_true')

    args, run_dict = check_input_and_fill_run_dict(parser)

    # turn TF debug prints off
    if not args.verbose and args.framework.lower() == 'tensorflow':
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    # dump documentation
    logger.logger.set_dump_dir(run_dict['experiment_path'], add_timestamp=True)
    if not args.no_summary:
        atexit.register(logger.logger.summarize_experiment)
        logger.screen.change_terminal_title(logger.logger.experiment_name)

    # Single-threaded runs
    if run_dict['num_threads'] == 1:
        # set tuning parameters
        json_run_dict_path = run_dict_to_json(run_dict)
        tuning_parameters = presets.json_to_preset(json_run_dict_path)
        tuning_parameters.sess = set_framework(args.framework)

        if args.print_parameters:
            print('tuning_parameters', tuning_parameters)

        # Single-thread runs
        tuning_parameters.task_index = 0
        env_instance = environments.create_environment(tuning_parameters)
        agent = eval('agents.' + tuning_parameters.agent.type +
                     '(env_instance, tuning_parameters)')

        # Start the training or evaluation
        if tuning_parameters.evaluate:
            agent.evaluate(sys.maxsize, keep_networks_synced=True)  # evaluate forever
        else:
            agent.improve()

    # Multi-threaded runs
    else:
        assert args.framework.lower() == 'tensorflow', "Distributed training works only with TensorFlow"
        os.environ["OMP_NUM_THREADS"]="1"
        # set parameter server and workers addresses
        ps_hosts = "localhost:{}".format(utils.get_open_port())
        worker_hosts = ",".join(["localhost:{}".format(utils.get_open_port()) for i in range(run_dict['num_threads'] + 1)])

        # Make sure to disable GPU so that all the workers will use the CPU
        utils.set_cpu()

        # create a parameter server
        cmd = [
            "python3",
           "./parallel_actor.py",
           "--ps_hosts={}".format(ps_hosts),
           "--worker_hosts={}".format(worker_hosts),
           "--job_name=ps",
        ]
        parameter_server = subprocess.Popen(cmd)

        logger.screen.log_title("*** Distributed Training ***")
        time.sleep(1)

        # create N training workers and 1 evaluating worker
        workers = []

        for i in range(run_dict['num_threads'] + 1):
            # this is the evaluation worker
            run_dict['task_id'] = i
            if i == run_dict['num_threads']:
                run_dict['evaluate_only'] = True
                run_dict['visualization.render'] = args.render
            else:
                run_dict['evaluate_only'] = False
                run_dict['visualization.render'] = False  # #In a parallel setting, only the evaluation agent renders

            json_run_dict_path = run_dict_to_json(run_dict, i)
            workers_args = ["python3", "./parallel_actor.py",
                            "--ps_hosts={}".format(ps_hosts),
                            "--worker_hosts={}".format(worker_hosts),
                            "--job_name=worker",
                            "--load_json={}".format(json_run_dict_path)]

            p = subprocess.Popen(workers_args)

            if i != run_dict['num_threads']:
                workers.append(p)
            else:
                evaluation_worker = p

        # wait for all workers
        [w.wait() for w in workers]
        evaluation_worker.kill()