create per environment Dockerfiles. (#70)

* create per environment Dockerfiles. Adjust CI setup to better parallelize runs. Fix a couple of issues in golden and trace tests. Update a few of the docs. * bugfix in mmc agent. Also install kubectl for CI, update badge branch. * remove integration test parallelism.
2026-02-17 23:05:51 +01:00 · 2018-11-14 07:40:22 -08:00
parent a849c17e46
commit 524f8436a2
20 changed files with 448 additions and 139 deletions
--- a/rl_coach/agents/mmc_agent.py
+++ b/rl_coach/agents/mmc_agent.py
@@ -64,7 +64,7 @@ class MixedMonteCarloAgent(ValueOptimizationAgent):
            one_step_target = batch.rewards()[i] + \
                              (1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * \
                              q_st_plus_1[i][selected_actions[i]]
-            monte_carlo_target = total_returns()[i]
+            monte_carlo_target = total_returns[i]
            TD_targets[i, batch.actions()[i]] = (1 - self.mixing_rate) * one_step_target + \
                                                self.mixing_rate * monte_carlo_target

--- a/rl_coach/environments/doom_environment.py
+++ b/rl_coach/environments/doom_environment.py
@@ -132,8 +132,12 @@ class DoomEnvironment(Environment):
        # load the emulator with the required level
        self.level = DoomLevel[level.upper()]
        local_scenarios_path = path.join(os.path.dirname(os.path.realpath(__file__)), 'doom')
-        self.scenarios_dir = local_scenarios_path if 'COACH_LOCAL' in level \
-            else path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')
+        if 'COACH_LOCAL' in level:
+            self.scenarios_dir = local_scenarios_path
+        elif 'VIZDOOM_ROOT' in environ:
+            self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')
+        else:
+            self.scenarios_dir = path.join(os.path.dirname(os.path.realpath(vizdoom.__file__)), 'scenarios')

        self.game = vizdoom.DoomGame()
        self.game.load_config(path.join(self.scenarios_dir, self.level.value))
--- a/rl_coach/tests/README.md
+++ b/rl_coach/tests/README.md
@@ -9,11 +9,12 @@ several parts, each testing the framework in different areas and strictness.
 * **Docker** -
    
    The docker image we supply checks Coach in terms of installation process, and verifies that all the components
-    are installed correctly. To build the Docke, use the command:
+    are installed correctly. To build the Docker image, use the command:
    
    ```
-    docker build . -t coach
-    docker run -it coach /bin/bash
+    cd docker
+    make build_base && make build
+    make run
    ```
    

@@ -45,7 +46,7 @@ several parts, each testing the framework in different areas and strictness.
    The golden tests can be run using the following command:
    
    ```
-    python3 rl_coach/tests/golden_tests.py
+    python3 rl_coach/tests/test_golden.py
    ```

 * **Trace tests** -
--- a/rl_coach/tests/test_eks.py
+++ b/rl_coach/tests/test_eks.py
@@ -37,6 +37,7 @@ class EKSHandler():
        container = client.V1Container(
            name=self.test_name,
            image=self.image,
+            command=['/bin/bash', '-c'],
            args=[self.test_command],
            image_pull_policy='Always',
            working_dir=self.working_dir,
--- a/rl_coach/tests/test_golden.py
+++ b/rl_coach/tests/test_golden.py
@@ -94,14 +94,13 @@ def collect_presets():
            yield preset_name


-print(list(collect_presets()))
@pytest.fixture(params=list(collect_presets()))
 def preset_name(request):
    return request.param


@pytest.mark.golden_test
-def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60):
+def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60, verbose=False):
    preset_validation_params = validation_params(preset_name)

    win_size = 10
@@ -200,12 +199,12 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60):
    else:
        if time.time() - start_time > time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
-            if args.verbose:
+            if verbose:
                screen.error("command exitcode: {}".format(p.returncode), crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
-            if args.verbose:
+            if verbose:
                screen.error("command exitcode: {}".format(p.returncode), crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error("preset_validation_params.max_episodes_to_achieve_reward: {}".format(
@@ -216,7 +215,7 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60):
            screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
        else:
            screen.error("csv file never found", crash=False)
-            if args.verbose:
+            if verbose:
                screen.error("command exitcode: {}".format(p.returncode), crash=False)
                screen.error(open(log_file_name).read(), crash=False)

@@ -227,12 +226,12 @@ def test_preset_reward(preset_name, no_progress_bar=False, time_limit=60 * 60):

 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('-p', '--preset',
-                        help="(string) Name of a preset to run (as configured in presets.py)",
+    parser.add_argument('-p', '--preset', '--presets',
+                        help="(string) Name of preset(s) to run (comma separated, and as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-ip', '--ignore_presets',
-                        help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
+                        help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-v', '--verbose',
@@ -251,7 +250,7 @@ def main():

    args = parser.parse_args()
    if args.preset is not None:
-        presets_lists = [args.preset]
+        presets_lists = args.preset.split(',')
    else:
        presets_lists = all_presets()

@@ -268,6 +267,7 @@ def main():
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
+            print("Attempting to run Preset: %s" % preset_name)
            if not importable(preset_name):
                screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
                fail_count += 1
@@ -278,7 +278,7 @@ def main():
                continue

            test_count += 1
-            test_passed = test_preset_reward(preset_name, args.no_progress_bar, args.time_limit)
+            test_passed = test_preset_reward(preset_name, args.no_progress_bar, args.time_limit, args.verbose)
            if not test_passed:
                fail_count += 1

--- a/rl_coach/tests/trace_tests.py
+++ b/rl_coach/tests/trace_tests.py
@@ -168,12 +168,12 @@ def wait_and_check(args, processes, force=False):

 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('-p', '--preset',
-                        help="(string) Name of a preset to run (as configured in presets.py)",
+    parser.add_argument('-p', '--preset', '--presets',
+                        help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-ip', '--ignore_presets',
-                        help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
+                        help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-v', '--verbose',
@@ -198,7 +198,7 @@ def main():
        args.max_threads = 1

    if args.preset is not None:
-        presets_lists = [args.preset]
+        presets_lists = args.preset.split(',')
    else:
        presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
                         f[-3:] == '.py' and not f == '__init__.py']