Coach as a library (#348)

* CoachInterface + tutorial * Some improvements and typo fixes * merge tutorial 0 and 4 * typo fix + additional tutorial changes * tutorial changes * added reading signals and experiment path argument
2025-12-17 19:20:19 +01:00 · 2019-06-19 18:05:03 +03:00
parent 1c90bc22a1
commit 8e812ef82f
6 changed files with 181 additions and 87 deletions
--- a/rl_coach/coach.py
+++ b/rl_coach/coach.py
@@ -18,7 +18,6 @@ sys.path.append('.')

 import copy
 from configparser import ConfigParser, Error
-from rl_coach.core_types import EnvironmentSteps
 import os
 from rl_coach import logger
 import traceback
@@ -30,6 +29,8 @@ import sys
 import json
 from rl_coach.base_parameters import Frameworks, VisualizationParameters, TaskParameters, DistributedTaskParameters, \
    RunType, DistributedCoachSynchronizationType
+from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \
+    EnvironmentSteps, StepMethod, Transition
 from multiprocessing import Process
 from multiprocessing.managers import BaseManager
 import subprocess
@@ -316,7 +317,7 @@ class CoachLauncher(object):

        return preset

-    def get_config_args(self, parser: argparse.ArgumentParser) -> argparse.Namespace:
+    def get_config_args(self, parser: argparse.ArgumentParser, arguments=None) -> argparse.Namespace:
        """
        Returns a Namespace object with all the user-specified configuration options needed to launch.
        This implementation uses argparse to take arguments from the CLI, but this can be over-ridden by
@@ -329,15 +330,19 @@ class CoachLauncher(object):

        :param parser: a parser object which implicitly defines the format of the Namespace that
                       is expected to be returned.
+        :param arguments: command line arguments
        :return: the parsed arguments as a Namespace
        """
+        if arguments is None:
            args = parser.parse_args()
+        else:
+            args = parser.parse_args(arguments)

        if args.nocolor:
            screen.set_use_colors(False)

        # if no arg is given
-        if len(sys.argv) == 1:
+        if (len(sys.argv) == 1 and arguments is None) or (arguments is not None and len(arguments) <= 2):
            parser.print_help()
            sys.exit(1)

@@ -417,7 +422,7 @@ class CoachLauncher(object):

        # get experiment name and path
        args.experiment_name = logger.get_experiment_name(args.experiment_name)
-        args.experiment_path = logger.get_experiment_path(args.experiment_name)
+        args.experiment_path = logger.get_experiment_path(args.experiment_name, args.experiment_path)

        if args.play and args.num_workers > 1:
            screen.warning("Playing the game as a human is only available with a single worker. "
@@ -450,7 +455,11 @@ class CoachLauncher(object):
                            action='store_true')
        parser.add_argument('-e', '--experiment_name',
                            help="(string) Experiment name to be used to store the results.",
-                            default='',
+                            default=None,
+                            type=str)
+        parser.add_argument('-ep', '--experiment_path',
+                            help="(string) Path to experiments folder.",
+                            default=None,
                            type=str)
        parser.add_argument('-r', '--render',
                            help="(flag) Render environment",
@@ -526,7 +535,8 @@ class CoachLauncher(object):
                                 " the selected preset (or on top of the command-line assembled one). "
                                 "Whenever a parameter value is a string, it should be inputted as '\\\"string\\\"'. "
                                 "For ex.: "
-                                 "\"visualization.render=False; num_training_iterations=500; optimizer='rmsprop'\"",
+                                 "\"visualization_parameters.render=False; heatup_steps=EnvironmentSteps(1000);"
+                                 "improve_steps=TrainingSteps(100000); optimizer='rmsprop'\"",
                            default=None,
                            type=str)
        parser.add_argument('--print_networks_summary',
@@ -589,14 +599,31 @@ class CoachLauncher(object):
        return parser

    def run_graph_manager(self, graph_manager: 'GraphManager', args: argparse.Namespace):
+        task_parameters = self.create_task_parameters(graph_manager, args)
+
+        if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR:
+            handle_distributed_coach_tasks(graph_manager, args, task_parameters)
+            return
+
+        # Single-threaded runs
+        if args.num_workers == 1:
+            self.start_single_threaded(task_parameters, graph_manager, args)
+        else:
+            self.start_multi_threaded(graph_manager, args)
+
+    @staticmethod
+    def create_task_parameters(graph_manager: 'GraphManager', args: argparse.Namespace):
        if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type:
-            screen.error("{} algorithm is not supported using distributed Coach.".format(graph_manager.agent_params.algorithm))
+            screen.error(
+                "{} algorithm is not supported using distributed Coach.".format(graph_manager.agent_params.algorithm))

        if args.distributed_coach and args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
-            screen.warning("The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration.")
+            screen.warning(
+                "The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration.")

        if args.distributed_coach and not args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC:
-            screen.error("Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s.")
+            screen.error(
+                "Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s.")

        # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread.
        # This will not affect GPU runs.
@@ -617,6 +644,13 @@ class CoachLauncher(object):
        checkpoint_restore_path = args.checkpoint_restore_dir if args.checkpoint_restore_dir \
            else args.checkpoint_restore_file

+        # open dashboard
+        if args.open_dashboard:
+            open_dashboard(args.experiment_path)
+
+        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
+            exit(handle_distributed_coach_orchestrator(args))
+
        task_parameters = TaskParameters(
            framework_type=args.framework,
            evaluate_only=args.evaluate,
@@ -630,22 +664,7 @@ class CoachLauncher(object):
            apply_stop_condition=args.apply_stop_condition
        )

-        # open dashboard
-        if args.open_dashboard:
-            open_dashboard(args.experiment_path)
-
-        if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR:
-            handle_distributed_coach_tasks(graph_manager, args, task_parameters)
-            return
-
-        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
-            exit(handle_distributed_coach_orchestrator(args))
-
-        # Single-threaded runs
-        if args.num_workers == 1:
-            self.start_single_threaded(task_parameters, graph_manager, args)
-        else:
-            self.start_multi_threaded(graph_manager, args)
+        return task_parameters

    @staticmethod
    def start_single_threaded(task_parameters, graph_manager: 'GraphManager', args: argparse.Namespace):
@@ -708,6 +727,7 @@ class CoachLauncher(object):
        # wait a bit before spawning the non chief workers in order to make sure the session is already created
        workers = []
        workers.append(start_distributed_task("worker", 0))
+
        time.sleep(2)
        for task_index in range(1, args.num_workers):
            workers.append(start_distributed_task("worker", task_index))
@@ -722,6 +742,34 @@ class CoachLauncher(object):
            evaluation_worker.terminate()


+class CoachInterface(CoachLauncher):
+    """
+        This class is used as an interface to use coach as library. It can take any of the command line arguments
+        (with the respective names) as arguments to the class.
+    """
+    def __init__(self, **kwargs):
+        parser = self.get_argument_parser()
+
+        arguments = []
+        for key in kwargs:
+            arguments.append('--' + key)
+            arguments.append(str(kwargs[key]))
+
+        if '--experiment_name' not in arguments:
+            arguments.append('--experiment_name')
+            arguments.append('')
+        self.args = self.get_config_args(parser, arguments)
+
+        self.graph_manager = self.get_graph_manager_from_args(self.args)
+
+        if self.args.num_workers == 1:
+            task_parameters = self.create_task_parameters(self.graph_manager, self.args)
+            self.graph_manager.create_graph(task_parameters)
+
+    def run(self):
+        self.run_graph_manager(self.graph_manager, self.args)
+
+
 def main():
    launcher = CoachLauncher()
    launcher.launch()
--- a/rl_coach/environments/gym_environment.py
+++ b/rl_coach/environments/gym_environment.py
@@ -30,7 +30,7 @@ except ImportError:
    failed_imports.append("RoboSchool")

 try:
-    from rl_coach.gym_extensions.continuous import mujoco
+    from gym_extensions.continuous import mujoco
 except:
    from rl_coach.logger import failed_imports
    failed_imports.append("GymExtensions")
--- a/rl_coach/graph_managers/basic_rl_graph_manager.py
+++ b/rl_coach/graph_managers/basic_rl_graph_manager.py
@@ -35,6 +35,7 @@ class BasicRLGraphManager(GraphManager):
                 preset_validation_params: PresetValidationParameters = PresetValidationParameters(),
                 name='simple_rl_graph'):
        super().__init__(name, schedule_params, vis_params)
+
        self.agent_params = agent_params
        self.env_params = env_params
        self.preset_validation_params = preset_validation_params
@@ -71,3 +72,13 @@ class BasicRLGraphManager(GraphManager):
        level_manager = LevelManager(agents=agent, environment=env, name="main_level")

        return [level_manager], [env]
+
+    def log_signal(self, signal_name, value):
+        self.level_managers[0].agents['agent'].agent_logger.create_signal_value(signal_name, value)
+
+    def get_signal_value(self, signal_name):
+        return self.level_managers[0].agents['agent'].agent_logger.get_signal_value(signal_name)
+
+    def get_agent(self):
+        return self.level_managers[0].agents['agent']
+
--- a/rl_coach/graph_managers/graph_manager.py
+++ b/rl_coach/graph_managers/graph_manager.py
@@ -23,12 +23,10 @@ from typing import List, Tuple
 import contextlib

 from rl_coach.base_parameters import iterable_to_items, TaskParameters, DistributedTaskParameters, Frameworks, \
-    VisualizationParameters, \
-    Parameters, PresetValidationParameters, RunType
+    VisualizationParameters, Parameters, PresetValidationParameters, RunType
 from rl_coach.checkpoint import CheckpointStateUpdater, get_checkpoint_state, SingleCheckpoint, CheckpointState
 from rl_coach.core_types import TotalStepsCounter, RunPhase, PlayingStepsType, TrainingSteps, EnvironmentEpisodes, \
-    EnvironmentSteps, \
-    StepMethod, Transition
+    EnvironmentSteps, StepMethod, Transition
 from rl_coach.environments.environment import Environment
 from rl_coach.level_manager import LevelManager
 from rl_coach.logger import screen, Logger
@@ -123,6 +121,10 @@ class GraphManager(object):
        self.time_metric = TimeTypes.EpisodeNumber

    def create_graph(self, task_parameters: TaskParameters=TaskParameters()):
+        # check if create graph has been already called
+        if self.graph_creation_time is not None:
+            return self
+
        self.graph_creation_time = time.time()
        self.task_parameters = task_parameters

--- a/rl_coach/logger.py
+++ b/rl_coach/logger.py
@@ -206,7 +206,7 @@ class BaseLogger(object):
            return True
        return False

-    def signal_value_exists(self, time, signal_name):
+    def signal_value_exists(self, signal_name, time):
        try:
            value = self.get_signal_value(time, signal_name)
            if value != value:  # value is nan
@@ -215,7 +215,9 @@ class BaseLogger(object):
            return False
        return True

-    def get_signal_value(self, time, signal_name):
+    def get_signal_value(self, signal_name, time=None):
+        if not time:
+            time = self.time
        return self.data.loc[time, signal_name]

    def dump_output_csv(self, append=True):
@@ -382,12 +384,12 @@ def summarize_experiment():
        screen.log_title("Results moved to: {}".format(new_path))


-def get_experiment_name(initial_experiment_name=''):
+def get_experiment_name(initial_experiment_name=None):
    global experiment_name

    match = None
    while match is None:
-        if initial_experiment_name == '':
+        if initial_experiment_name is None:
            msg_if_timeout = "Timeout waiting for experiement name."
            experiment_name = screen.ask_input_with_timeout("Please enter an experiment name: ", 60, msg_if_timeout)
        else:
@@ -407,10 +409,12 @@ def get_experiment_name(initial_experiment_name=''):
    return experiment_name


-def get_experiment_path(experiment_name, create_path=True):
+def get_experiment_path(experiment_name, initial_experiment_path=None, create_path=True):
    global experiment_path

-    general_experiments_path = os.path.join('./experiments/', experiment_name)
+    if not initial_experiment_path:
+        initial_experiment_path = './experiments/'
+    general_experiments_path = os.path.join(initial_experiment_path, experiment_name)

    cur_date = time_started.date()
    cur_time = time_started.time()
--- a/tutorials/0.
+++ b/tutorials/0.
@@ -54,6 +54,8 @@
   "source": [
    "Alternatively, Coach can be used a library directly from python. As described above, Coach uses the presets mechanism to define the experiments. A preset is essentially a python module which instantiates a `GraphManager` object. The graph manager is a container that holds the agents and the environments, and has some additional parameters for running the experiment, such as visualization parameters. The graph manager acts as the scheduler which orchestrates the experiment.\n",
    "\n",
+    "Running Coach directly from python is done through a `CoachInterface` object, which uses the same arguments as the command line invocation but allowes for more flexibility and additional control of the training/inference process.\n",
+    "\n",
    "Let's start with some examples.\n",
    "\n",
    "Creating a very simple graph containing a single Clipped PPO agent running with the CartPole-v0 Gym environment:"
@@ -65,16 +67,24 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters\n",
-    "from rl_coach.environments.gym_environment import GymVectorEnvironment\n",
-    "from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager\n",
-    "from rl_coach.graph_managers.graph_manager import SimpleSchedule\n",
+    "# Adding module path to sys path if not there, so rl_coach submodules can be imported\n",
+    "import os\n",
+    "import sys\n",
+    "module_path = os.path.abspath(os.path.join('..'))\n",
+    "if module_path not in sys.path:\n",
+    "    sys.path.append(module_path)\n",
    "\n",
-    "graph_manager = BasicRLGraphManager(\n",
-    "    agent_params=ClippedPPOAgentParameters(),\n",
-    "    env_params=GymVectorEnvironment(level='CartPole-v0'),\n",
-    "    schedule_params=SimpleSchedule()\n",
-    ")"
+    "from rl_coach.coach import CoachInterface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coach = CoachInterface(preset='CartPole_ClippedPPO',\n",
+    "                       custom_parameter='heatup_steps=EnvironmentSteps(5);improve_steps=TrainingSteps(3)')"
   ]
  },
  {
@@ -90,7 +100,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "graph_manager.improve()"
+    "coach.run()"
   ]
  },
  {
@@ -104,7 +114,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "The graph manager simplifies the scheduling process by encapsulating the calls to each of the training phases. Sometimes, it can be beneficial to have a more fine grained control over the scheduling process. This can be easily done by calling the individual phase functions directly:"
+    "The graph manager (which was instantiated in the preset) can be accessed from the `CoachInterface` object. The graph manager simplifies the scheduling process by encapsulating the calls to each of the training phases. Sometimes, it can be beneficial to have a more fine grained control over the scheduling process. This can be easily done by calling the individual phase functions directly:"
   ]
  },
  {
@@ -115,23 +125,18 @@
   "source": [
    "from rl_coach.core_types import EnvironmentSteps\n",
    "\n",
-    "graph_manager = BasicRLGraphManager(\n",
-    "    agent_params=ClippedPPOAgentParameters(),\n",
-    "    env_params=GymVectorEnvironment(level='CartPole-v0'),\n",
-    "    schedule_params=SimpleSchedule()\n",
-    ")\n",
-    "\n",
-    "graph_manager.heatup(EnvironmentSteps(100))\n",
-    "graph_manager.train_and_act(EnvironmentSteps(100))"
+    "coach.graph_manager.heatup(EnvironmentSteps(100))\n",
+    "for _ in range(10):\n",
+    "    coach.graph_manager.train_and_act(EnvironmentSteps(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### Changing the default parameters\n",
+    "### Additional functionality\n",
    "\n",
-    "Agents in Coach are defined along with some default parameters that follow the published paper definition. This may be sufficient when running the exact same experiments as in the paper, but otherwise, there would probably need to be some changes made to the algorithm parameters. Again, this is easily modifiable, and all the internal parameters are accessible from within the preset:"
+    "`CoachInterface` allows for easy access to functionalities such as multi-threading and saving checkpoints:"
   ]
  },
  {
@@ -140,39 +145,63 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters\n",
-    "from rl_coach.environments.gym_environment import GymVectorEnvironment\n",
-    "from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager\n",
-    "from rl_coach.graph_managers.graph_manager import SimpleSchedule\n",
-    "from rl_coach.graph_managers.graph_manager import ScheduleParameters\n",
-    "from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps\n",
-    "\n",
-    "# schedule\n",
-    "schedule_params = ScheduleParameters()\n",
-    "schedule_params.improve_steps = TrainingSteps(10000000)\n",
-    "schedule_params.steps_between_evaluation_periods = EnvironmentSteps(2048)\n",
-    "schedule_params.evaluation_steps = EnvironmentEpisodes(5)\n",
-    "schedule_params.heatup_steps = EnvironmentSteps(0)\n",
-    "\n",
-    "# agent parameters\n",
-    "agent_params = ClippedPPOAgentParameters()\n",
-    "agent_params.algorithm.discount = 1.0\n",
-    "\n",
-    "graph_manager = BasicRLGraphManager(\n",
-    "    agent_params=agent_params,\n",
-    "    env_params=GymVectorEnvironment(level='CartPole-v0'),\n",
-    "    schedule_params=schedule_params\n",
-    ")\n",
-    "\n",
-    "graph_manager.improve()\n"
+    "coach = CoachInterface(preset='CartPole_ClippedPPO', num_workers=2, checkpoint_save_secs=10)\n",
+    "coach.run()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### Using a custom gym environment\n",
+    "### Agent functionality\n",
    "\n",
+    "When using `CoachInterface` (single agent with one level of hierarchy) it's also possible to easily use the `Agent` object functionality, such as logging and reading signals and applying the policy the agent has learned on a given state:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rl_coach.environments.gym_environment import GymEnvironment,  GymVectorEnvironment\n",
+    "from rl_coach.base_parameters import VisualizationParameters\n",
+    "from rl_coach.core_types import EnvironmentSteps\n",
+    "\n",
+    "coach = CoachInterface(preset='CartPole_ClippedPPO')\n",
+    "\n",
+    "# training\n",
+    "for it in range(10):\n",
+    "    coach.graph_manager.log_signal('iteration', it)\n",
+    "    coach.graph_manager.train_and_act(EnvironmentSteps(100))\n",
+    "    training_reward = coach.graph_manager.get_signal_value('Training Reward')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# inference\n",
+    "env_params = GymVectorEnvironment(level='CartPole-v0')\n",
+    "env = GymEnvironment(**env_params.__dict__, visualization_parameters=VisualizationParameters())\n",
+    "\n",
+    "for it in range(10):\n",
+    "    action_info = coach.graph_manager.get_agent().choose_action(env.state)\n",
+    "    print(\"State:{}, Action:{}\".format(env.state,action_info.action))\n",
+    "    env.step(action_info.action)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using GraphManager Directly\n",
+    "\n",
+    "It is also possible to invoke coach directly in the python code without defining a preset (which is necessary for `CoachInterface`) by using the `GraphManager` object directly. Using Coach this way won't allow you access functionalities such as multi-threading, but it might be convenient if you don't want to define a preset file.\n",
+    "\n",
+    "Here we show an example of how to do so with a custom environment.\n",
    "We can use a custom gym environment without registering it. \n",
    "We just need the path to the environment module.\n",
    "We can also pass custom parameters for the environment `__init__` function as `additional_simulator_parameters`."
@@ -244,7 +273,7 @@
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
-    "version": 3
+    "version": 3.0
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
@@ -255,5 +284,5 @@
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 0
 }