diff --git a/coach/agents/__init__.py b/coach/agents/__init__.py index da6f38e..662bb2c 100644 --- a/coach/agents/__init__.py +++ b/coach/agents/__init__.py @@ -23,7 +23,6 @@ from coach.agents.ddpg_agent import DDPGAgent from coach.agents.ddqn_agent import DDQNAgent from coach.agents.dfp_agent import DFPAgent from coach.agents.dqn_agent import DQNAgent -from coach.agents.human_agent import HumanAgent from coach.agents.imitation_agent import ImitationAgent from coach.agents.mmc_agent import MixedMonteCarloAgent from coach.agents.n_step_q_agent import NStepQAgent @@ -46,7 +45,6 @@ __all__ = [ActorCriticAgent, DDQNAgent, DFPAgent, DQNAgent, - HumanAgent, ImitationAgent, MixedMonteCarloAgent, NAFAgent, diff --git a/coach/agents/human_agent.py b/coach/agents/human_agent.py deleted file mode 100644 index c9c3e5f..0000000 --- a/coach/agents/human_agent.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import collections -import os - -import pygame -from pandas.io import pickle - -from coach.agents import agent -from coach import logger -from coach import utils - - -class HumanAgent(agent.Agent): - def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0): - agent.Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) - - self.clock = pygame.time.Clock() - self.max_fps = int(self.tp.visualization.max_fps_for_human_control) - - logger.screen.log_title("Human Control Mode") - available_keys = self.env.get_available_keys() - if available_keys: - logger.screen.log("Use keyboard keys to move. Press escape to quit. Available keys:") - logger.screen.log("") - for action, key in self.env.get_available_keys(): - logger.screen.log("\t- {}: {}".format(action, key)) - logger.screen.separator() - - def train(self): - return 0 - - def choose_action(self, curr_state, phase=utils.RunPhase.TRAIN): - action = self.env.get_action_from_user() - - # keep constant fps - self.clock.tick(self.max_fps) - - if not self.env.renderer.is_open: - self.save_replay_buffer_and_exit() - - return action, {"action_value": 0} - - def save_replay_buffer_and_exit(self): - replay_buffer_path = os.path.join(logger.logger.experiments_path, 'replay_buffer.p') - self.memory.tp = None - pickle.to_pickle(self.memory, replay_buffer_path) - logger.screen.log_title("Replay buffer was stored in {}".format(replay_buffer_path)) - exit() - - def log_to_screen(self, phase): - # log to logger.screen - logger.screen.log_dict( - collections.OrderedDict([ - ("Episode", self.current_episode), - ("total reward", self.total_reward_in_current_episode), - ("steps", self.total_steps_counter) - ]), - prefix="Recording" - ) diff --git a/coach/architectures/network_wrapper.py b/coach/architectures/network_wrapper.py index 7e5d045..b4790de 100644 --- a/coach/architectures/network_wrapper.py +++ b/coach/architectures/network_wrapper.py @@ -16,18 +16,10 @@ import os import collections -from coach import configurations as conf -from coach import logger -try: - import tensorflow as tf - from coach.architectures.tensorflow_components import general_network as tf_net -except ImportError: - logger.failed_imports.append("TensorFlow") +import tensorflow as tf -try: - from coach.architectures.neon_components import general_network as neon_net -except ImportError: - logger.failed_imports.append("Neon") +from coach.architectures.tensorflow_components import general_network as tf_net +from coach import logger class NetworkWrapper(object): @@ -50,13 +42,7 @@ class NetworkWrapper(object): self.has_global = has_global self.name = name self.sess = tuning_parameters.sess - - if self.tp.framework == conf.Frameworks.TensorFlow: - general_network = tf_net.GeneralTensorFlowNetwork - elif self.tp.framework == conf.Frameworks.Neon: - general_network = neon_net.GeneralNeonNetwork - else: - raise Exception("{} Framework is not supported".format(conf.Frameworks().to_string(self.tp.framework))) + general_network = tf_net.GeneralTensorFlowNetwork # Global network - the main network shared between threads self.global_network = None @@ -78,7 +64,7 @@ class NetworkWrapper(object): self.target_network = general_network(tuning_parameters, '{}/target'.format(name), network_is_local=True) - if not self.tp.distributed and self.tp.framework == conf.Frameworks.TensorFlow: + if not self.tp.distributed: variables_to_restore = tf.global_variables() variables_to_restore = [v for v in variables_to_restore if '/online' in v.name] self.model_saver = tf.train.Saver(variables_to_restore) diff --git a/coach/cli.py b/coach/cli.py index 71dee80..7d75287 100644 --- a/coach/cli.py +++ b/coach/cli.py @@ -22,6 +22,8 @@ import subprocess import sys import time +import tensorflow as tf + from coach import agents # noqa from coach import configurations as conf from coach import environments @@ -30,28 +32,6 @@ from coach import presets from coach import utils -if len(set(logger.failed_imports)) > 0: - logger.screen.warning("Warning: failed to import the following packages - {}".format(', '.join(set(logger.failed_imports)))) - - -def set_framework(framework_type): - # choosing neural network framework - framework = conf.Frameworks().get(framework_type) - sess = None - if framework == conf.Frameworks.TensorFlow: - import tensorflow as tf - config = tf.ConfigProto() - config.allow_soft_placement = True - config.gpu_options.allow_growth = True - config.gpu_options.per_process_gpu_memory_fraction = 0.2 - sess = tf.Session(config=config) - elif framework == conf.Frameworks.Neon: - import ngraph as ng - sess = ng.transformers.make_transformer() - logger.screen.log_title("Using {} framework".format(conf.Frameworks().to_string(framework))) - return sess - - def check_input_and_fill_run_dict(parser): args = parser.parse_args() @@ -68,48 +48,28 @@ def check_input_and_fill_run_dict(parser): print(preset) sys.exit(0) - # check inputs - try: - # num_workers = int(args.num_workers) - num_workers = int(re.match("^\d+$", args.num_workers).group(0)) - except ValueError: - logger.screen.error("Parameter num_workers should be an integer.") - preset_names = utils.list_all_classes_in_module(presets) if args.preset is not None and args.preset not in preset_names: logger.screen.error("A non-existing preset was selected. ") - if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir): - logger.screen.error("The requested checkpoint folder to load from does not exist. ") + if (args.checkpoint_restore_dir is not None and not + os.path.exists(args.checkpoint_restore_dir)): + logger.screen.error("The requested checkpoint folder to load from " + "does not exist. ") - if args.save_model_sec is not None: - try: - args.save_model_sec = int(args.save_model_sec) - except ValueError: - logger.screen.error("Parameter save_model_sec should be an integer.") - - if args.preset is None and (args.agent_type is None or args.environment_type is None - or args.exploration_policy_type is None) and not args.play: - logger.screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,' - ' environment_type and exploration_policy_type to assemble a preset. ' - '\nAt least one of these parameters was not given.') - elif args.preset is None and args.play and args.environment_type is None: - logger.screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,' - ' the user is expected to input the desired environment_type and level.' - '\nAt least one of these parameters was not given.') - elif args.preset is None and args.play and args.environment_type: - args.agent_type = 'Human' - args.exploration_policy_type = 'ExplorationParameters' + if (not args.preset and not + all([args.agent_type, args.environment_type, + args.exploration_policy_type])): + logger.screen.error('When no preset is given for Coach to run, the ' + 'user is expected to input the desired agent_type,' + ' environment_type and exploration_policy_type to' + ' assemble a preset.\nAt least one of these ' + 'parameters was not given.') # get experiment name and path experiment_name = logger.logger.get_experiment_name(args.experiment_name) experiment_path = logger.logger.get_experiment_path(experiment_name) - if args.play and num_workers > 1: - logger.screen.warning("Playing the game as a human is only available with a single worker. " - "The number of workers will be reduced to 1") - num_workers = 1 - # fill run_dict run_dict = dict() run_dict['agent_type'] = args.agent_type @@ -119,16 +79,16 @@ def check_input_and_fill_run_dict(parser): run_dict['preset'] = args.preset run_dict['custom_parameter'] = args.custom_parameter run_dict['experiment_path'] = experiment_path - run_dict['framework'] = conf.Frameworks().get(args.framework) - run_dict['play'] = args.play - run_dict['evaluate'] = args.evaluate# or args.play + run_dict['evaluate'] = args.evaluate # multi-threading parameters - run_dict['num_threads'] = num_workers + run_dict['num_threads'] = args.num_workers # checkpoints run_dict['save_model_sec'] = args.save_model_sec - run_dict['save_model_dir'] = experiment_path if args.save_model_sec is not None else None + run_dict['save_model_dir'] = None + if args.save_model_sec: + run_dict['save_model_dir'] = experiment_path run_dict['checkpoint_restore_dir'] = args.checkpoint_restore_dir # visualization @@ -141,7 +101,8 @@ def check_input_and_fill_run_dict(parser): def run_dict_to_json(_run_dict, task_id=''): if task_id != '': - json_path = os.path.join(_run_dict['experiment_path'], 'run_dict_worker{}.json'.format(task_id)) + json_path = os.path.join(_run_dict['experiment_path'], + 'run_dict_worker{}.json'.format(task_id)) else: json_path = os.path.join(_run_dict['experiment_path'], 'run_dict.json') @@ -153,97 +114,82 @@ def run_dict_to_json(_run_dict, task_id=''): def main(): parser = argparse.ArgumentParser() - parser.add_argument('-p', '--preset', - help="(string) Name of a preset to run (as configured in presets.py)", - default=None, - type=str) - parser.add_argument('-l', '--list', - help="(flag) List all available presets", - action='store_true') - parser.add_argument('-e', '--experiment_name', - help="(string) Experiment name to be used to store the results.", - default='', - type=str) - parser.add_argument('-r', '--render', - help="(flag) Render environment", - action='store_true') - parser.add_argument('-f', '--framework', - help="(string) Neural network framework. Available values: tensorflow, neon", - default='tensorflow', - type=str) - parser.add_argument('-n', '--num_workers', - help="(int) Number of workers for multi-process based agents, e.g. A3C", - default='1', - type=str) - parser.add_argument('--play', - help="(flag) Play as a human by controlling the game with the keyboard. " - "This option will save a replay buffer with the game play.", - action='store_true') - parser.add_argument('--evaluate', - help="(flag) Run evaluation only. This is a convenient way to disable " - "training in order to evaluate an existing checkpoint.", - action='store_true') - parser.add_argument('-v', '--verbose', - help="(flag) Don't suppress TensorFlow debug prints.", - action='store_true') - parser.add_argument('-s', '--save_model_sec', - help="(int) Time in seconds between saving checkpoints of the model.", - default=None, - type=int) + parser.add_argument('-p', '--preset', default=None, + help='(string) Name of a preset to run (as configured ' + 'in presets.py)') + parser.add_argument('-l', '--list', action='store_true', + help='(flag) List all available presets') + parser.add_argument('-e', '--experiment_name', default='', + help='(string) Experiment name to be used to store ' + 'the results.') + parser.add_argument('-r', '--render', action='store_true', + help='(flag) Render environment') + parser.add_argument('-n', '--num_workers', default=1, type=int, + help='(int) Number of workers for multi-process based ' + 'agents, e.g. A3C') + parser.add_argument('--evaluate', action='store_true', + help='(flag) Run evaluation only. This is a ' + 'convenient way to disable training in order to ' + 'evaluate an existing checkpoint.') + parser.add_argument('-v', '--verbose', action='store_true', + help='(flag) Don\'t suppress TensorFlow debug prints.') + parser.add_argument('-s', '--save_model_sec', default=None, type=int, + help='(int) Time in seconds between saving checkpoints' + ' of the model.') parser.add_argument('-crd', '--checkpoint_restore_dir', - help='(string) Path to a folder containing a checkpoint to restore the model from.', - type=str) - parser.add_argument('-dg', '--dump_gifs', - help="(flag) Enable the gif saving functionality.", - action='store_true') - parser.add_argument('-at', '--agent_type', - help="(string) Choose an agent type class to override on top of the selected preset. " - "If no preset is defined, a preset can be set from the command-line by combining settings " - "which are set by using --agent_type, --experiment_type, --environemnt_type", - default=None, - type=str) - parser.add_argument('-et', '--environment_type', - help="(string) Choose an environment type class to override on top of the selected preset." - "If no preset is defined, a preset can be set from the command-line by combining settings " - "which are set by using --agent_type, --experiment_type, --environemnt_type", - default=None, - type=str) - parser.add_argument('-ept', '--exploration_policy_type', - help="(string) Choose an exploration policy type class to override on top of the selected " - "preset." - "If no preset is defined, a preset can be set from the command-line by combining settings " - "which are set by using --agent_type, --experiment_type, --environemnt_type" - , - default=None, - type=str) - parser.add_argument('-lvl', '--level', - help="(string) Choose the level that will be played in the environment that was selected." - "This value will override the level parameter in the environment class." - , - default=None, - type=str) - parser.add_argument('-cp', '--custom_parameter', - help="(string) Semicolon separated parameters used to override specific parameters on top of" - " the selected preset (or on top of the command-line assembled one). " - "Whenever a parameter value is a string, it should be inputted as '\\\"string\\\"'. " - "For ex.: " - "\"visualization.render=False; num_training_iterations=500; optimizer='rmsprop'\"", - default=None, - type=str) - parser.add_argument('--print_parameters', - help="(flag) Print tuning_parameters to stdout", - action='store_true') - parser.add_argument('-tb', '--tensorboard', - help="(flag) When using the TensorFlow backend, enable TensorBoard log dumps. ", - action='store_true') - parser.add_argument('-ns', '--no_summary', - help="(flag) Prevent Coach from printing a summary and asking questions at the end of runs", - action='store_true') + help='(string) Path to a folder containing a ' + 'checkpoint to restore the model from.') + parser.add_argument('-dg', '--dump_gifs', action='store_true', + help='(flag) Enable the gif saving functionality.') + parser.add_argument('-at', '--agent_type', default=None, + help='(string) Choose an agent type class to override' + ' on top of the selected preset. If no preset is ' + 'defined, a preset can be set from the command-line ' + 'by combining settings which are set by using ' + '--agent_type, --experiment_type, --environemnt_type') + parser.add_argument('-et', '--environment_type', default=None, + help='(string) Choose an environment type class to ' + 'override on top of the selected preset. If no preset' + ' is defined, a preset can be set from the ' + 'command-line by combining settings which are set by ' + 'using --agent_type, --experiment_type, ' + '--environemnt_type') + parser.add_argument('-ept', '--exploration_policy_type', default=None, + help='(string) Choose an exploration policy type ' + 'class to override on top of the selected preset. If ' + 'no preset is defined, a preset can be set from the ' + 'command-line by combining settings which are set by ' + 'using --agent_type, --experiment_type, ' + '--environemnt_type') + parser.add_argument('-lvl', '--level', default=None, + help='(string) Choose the level that will be played ' + 'in the environment that was selected. This value ' + 'will override the level parameter in the environment ' + 'class.') + parser.add_argument('-cp', '--custom_parameter', default=None, + help='(string) Semicolon separated parameters used to ' + 'override specific parameters on top of the selected ' + 'preset (or on top of the command-line assembled ' + 'one). Whenever a parameter value is a string, it ' + 'should be inputted as "string". For ex.: ' + '"visualization.render=False; ' + 'num_training_iterations=500; optimizer=\'rmsprop\'"') + parser.add_argument('-pf', '--parameters_file', default=None, + help='YAML file with customized parameters, just like ' + '\'--custom-parameter\' bit in a file for convenience') + parser.add_argument('--print_parameters', action='store_true', + help='(flag) Print tuning_parameters to stdout') + parser.add_argument('-tb', '--tensorboard', action='store_true', + help='(flag) When using the TensorFlow backend, ' + 'enable TensorBoard log dumps. ') + parser.add_argument('-ns', '--no_summary', action='store_true', + help='(flag) Prevent Coach from printing a summary ' + 'and asking questions at the end of runs') args, run_dict = check_input_and_fill_run_dict(parser) # turn TF debug prints off - if not args.verbose and args.framework.lower() == 'tensorflow': + if not args.verbose: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # dump documentation @@ -257,43 +203,46 @@ def main(): # set tuning parameters json_run_dict_path = run_dict_to_json(run_dict) tuning_parameters = presets.json_to_preset(json_run_dict_path) - tuning_parameters.sess = set_framework(args.framework) + config = tf.ConfigProto() + config.allow_soft_placement = True + config.gpu_options.allow_growth = True + config.gpu_options.per_process_gpu_memory_fraction = 0.2 + tuning_parameters.sess = tf.Session(config=config) if args.print_parameters: print('tuning_parameters', tuning_parameters) # Single-thread runs tuning_parameters.task_index = 0 - env_instance = environments.create_environment(tuning_parameters) + env_instance = environments.create_environment(tuning_parameters) # noqa agent = eval('agents.' + tuning_parameters.agent.type + '(env_instance, tuning_parameters)') # Start the training or evaluation if tuning_parameters.evaluate: - agent.evaluate(sys.maxsize, keep_networks_synced=True) # evaluate forever + # evaluate forever + agent.evaluate(sys.maxsize, keep_networks_synced=True) else: agent.improve() # Multi-threaded runs else: - assert args.framework.lower() == 'tensorflow', "Distributed training works only with TensorFlow" - os.environ["OMP_NUM_THREADS"]="1" + os.environ['OMP_NUM_THREADS'] = '1' # set parameter server and workers addresses - ps_hosts = "localhost:{}".format(utils.get_open_port()) - worker_hosts = ",".join(["localhost:{}".format(utils.get_open_port()) for i in range(run_dict['num_threads'] + 1)]) + ps_hosts = 'localhost:{}'.format(utils.get_open_port()) + worker_hosts = ','.join(['localhost:{}'.format(utils.get_open_port()) + for i in range(run_dict['num_threads'] + 1)]) # Make sure to disable GPU so that all the workers will use the CPU utils.set_cpu() # create a parameter server - cmd = [ - "python3", - "./parallel_actor.py", - "--ps_hosts={}".format(ps_hosts), - "--worker_hosts={}".format(worker_hosts), - "--job_name=ps", - ] - parameter_server = subprocess.Popen(cmd) + cmd = ["python3", + "./parallel_actor.py", + "--ps_hosts={}".format(ps_hosts), + "--worker_hosts={}".format(worker_hosts), + "--job_name=ps"] + subprocess.Popen(cmd) logger.screen.log_title("*** Distributed Training ***") time.sleep(1) @@ -309,7 +258,8 @@ def main(): run_dict['visualization.render'] = args.render else: run_dict['evaluate_only'] = False - run_dict['visualization.render'] = False # #In a parallel setting, only the evaluation agent renders + # In a parallel setting, only the evaluation agent renders + run_dict['visualization.render'] = False json_run_dict_path = run_dict_to_json(run_dict, i) workers_args = ["python3", "./parallel_actor.py", diff --git a/coach/configurations.py b/coach/configurations.py index 90e7b8f..a03c319 100644 --- a/coach/configurations.py +++ b/coach/configurations.py @@ -160,7 +160,6 @@ class EnvironmentParameters(Parameters): reward_scaling = 1.0 reward_clipping_min = None reward_clipping_max = None - human_control = False class ExplorationParameters(Parameters): @@ -257,7 +256,6 @@ class VisualizationParameters(Parameters): dump_signals_to_csv_every_x_episodes = 5 render = False dump_gifs = True - max_fps_for_human_control = 10 tensorboard = False @@ -325,11 +323,6 @@ class Carla(EnvironmentParameters): allow_braking = False -class Human(AgentParameters): - type = 'HumanAgent' - num_episodes_in_experience_replay = 10000000 - - class NStepQ(AgentParameters): type = 'NStepQAgent' input_types = {'observation': InputTypes.Observation} diff --git a/coach/environments/doom_environment_wrapper.py b/coach/environments/doom_environment_wrapper.py index 0f88316..8f9117f 100644 --- a/coach/environments/doom_environment_wrapper.py +++ b/coach/environments/doom_environment_wrapper.py @@ -91,11 +91,7 @@ class DoomEnvironmentWrapper(ew.EnvironmentWrapper): self.game.set_window_visible(False) self.game.add_game_args("+vid_forcesurface 1") - self.wait_for_explicit_human_action = True - if self.human_control: - self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_640X480) - self.renderer.create_screen(640, 480) - elif self.is_rendered: + if self.is_rendered: self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240) self.renderer.create_screen(320, 240) else: diff --git a/coach/environments/environment_wrapper.py b/coach/environments/environment_wrapper.py index e201a51..1ec0dfb 100644 --- a/coach/environments/environment_wrapper.py +++ b/coach/environments/environment_wrapper.py @@ -57,9 +57,6 @@ class EnvironmentWrapper(object): self.is_rendered = self.tp.visualization.render self.seed = self.tp.seed self.frame_skip = self.tp.env.frame_skip - self.human_control = self.tp.env.human_control - self.wait_for_explicit_human_action = False - self.is_rendered = self.is_rendered or self.human_control self.game_is_open = True @property diff --git a/coach/environments/gym_environment_wrapper.py b/coach/environments/gym_environment_wrapper.py index 0b301fe..270da51 100644 --- a/coach/environments/gym_environment_wrapper.py +++ b/coach/environments/gym_environment_wrapper.py @@ -45,8 +45,6 @@ class GymEnvironmentWrapper(ew.EnvironmentWrapper): if self.is_rendered: image = self.get_rendered_image() scale = 1 - if self.human_control: - scale = 2 self.renderer.create_screen(image.shape[1]*scale, image.shape[0]*scale) if isinstance(self.env.observation_space, gym.spaces.Dict): diff --git a/coach/logger.py b/coach/logger.py index 4fca581..a65b726 100644 --- a/coach/logger.py +++ b/coach/logger.py @@ -221,12 +221,12 @@ class Logger(BaseLogger): def get_signal_value(self, time, signal_name): return self.data.loc[time, signal_name] - def dump_output_csv(self, append=True): + def dump_output_csv(self): self.data.index.name = "Episode #" if len(self.data.index) == 1: self.start_time = time.time() - if os.path.exists(self.csv_path) and append: + if os.path.exists(self.csv_path): self.data[self.last_line_idx_written_to_csv:].to_csv(self.csv_path, mode='a', header=False) else: self.data.to_csv(self.csv_path) diff --git a/coach/presets.py b/coach/presets.py index 07f2356..5d3cce5 100644 --- a/coach/presets.py +++ b/coach/presets.py @@ -44,12 +44,6 @@ def json_to_preset(json_path): if run_dict['exploration_policy_type'] is not None: tuning_parameters.exploration = eval('ep.' + run_dict['exploration_policy_type'])() - # human control - if run_dict['play']: - tuning_parameters.agent.type = 'HumanAgent' - tuning_parameters.env.human_control = True - tuning_parameters.num_heatup_steps = 0 - if run_dict['level']: tuning_parameters.env.level = run_dict['level'] diff --git a/scripts/coach b/scripts/coach old mode 100644 new mode 100755