tests: added new tests + utils code improved (#221)

* tests: added new tests + utils code improved * new tests: - test_preset_args_combination - test_preset_mxnet_framework * added more flags to test_preset_args * added validation for flags in utils * tests: added new tests + fixed utils * tests: added new checkpoint test * tests: added checkpoint test improve utils * tests: added tests + improve validations * bump integration CI run timeout. * tests: improve timerun + add functional test marker
2026-02-14 12:55:51 +01:00 · 2019-03-18 11:21:43 +02:00
parent d6158a5cfc
commit 4a8451ff02
8 changed files with 730 additions and 278 deletions
--- a/rl_coach/tests/utils/args_utils.py
+++ b/rl_coach/tests/utils/args_utils.py
@@ -16,17 +16,27 @@
 """Manage all command arguments."""

 import os
-import re
 import signal
 import time
-
-import psutil as psutil
-
-from rl_coach.logger import screen
-from rl_coach.tests.utils import test_utils
+import pandas as pd
+import numpy as np
+from rl_coach.tests.utils.test_utils import get_csv_path, get_files_from_dir, \
+    find_string_in_logs
 from rl_coach.tests.utils.definitions import Definitions as Def


+def collect_preset_for_mxnet():
+    """
+    Collect presets that relevant for args testing only.
+    This used for testing arguments for specific presets that defined in the
+    definitions (args_test under Presets).
+    :return: preset(s) list
+    """
+    for pn in Def.Presets.mxnet_args_test:
+        assert pn, Def.Consts.ASSERT_MSG.format("Preset name", pn)
+        yield pn
+
+
 def collect_preset_for_args():
    """
    Collect presets that relevant for args testing only.
@@ -39,19 +49,27 @@ def collect_preset_for_args():
        yield pn


+def collect_preset_for_seed():
+    """
+    Collect presets that relevant for seed argument testing only.
+    This used for testing arguments for specific presets that defined in the
+    definitions (args_test under Presets).
+    :return: preset(s) list
+    """
+    for pn in Def.Presets.seed_args_test:
+        assert pn, Def.Consts.ASSERT_MSG.format("Preset name", pn)
+        yield pn
+
+
 def collect_args():
    """
    Collect args from the cmd args list - on each test iteration, it will
    yield one line (one arg).
    :yield: one arg foe each test iteration
    """
-    for k, v in Def.Flags.cmd_args.items():
-        cmd = []
-        cmd.append(k)
-        if v is not None:
-            cmd.append(v)
-        assert cmd, Def.Consts.ASSERT_MSG.format("cmd array", str(cmd))
-        yield cmd
+    for i in Def.Flags.cmd_args:
+        assert i, Def.Consts.ASSERT_MSG.format("flag list", str(i))
+        yield i


 def add_one_flag_value(flag):
@@ -60,99 +78,86 @@ def add_one_flag_value(flag):
    :param flag: dict flag
    :return: flag with format
    """
-    if not flag or len(flag) > 2 or len(flag) == 0:
+    if not flag or len(flag) == 0:
        return []

    if len(flag) == 1:
        return flag

-    if Def.Flags.css in flag[1]:
-        flag[1] = 30
+    if Def.Flags.enw in flag[1]:
+        flag[1] = '2'

-    elif Def.Flags.crd in flag[1]:
-        # TODO: check dir of checkpoint
-        flag[1] = os.path.join(Def.Path.experiments)
+    elif Def.Flags.css in flag[1]:
+        flag[1] = '5'

-    elif Def.Flags.et in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+    elif Def.Flags.fw_ten in flag[1]:
+        flag[1] = "tensorflow"

-    elif Def.Flags.ept in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+    elif Def.Flags.fw_mx in flag[1]:
+        flag[1] = "mxnet"

    elif Def.Flags.cp in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
-
-    elif Def.Flags.seed in flag[1]:
-        flag[1] = 0
-
-    elif Def.Flags.dccp in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+        flag[1] = "heatup_steps=EnvironmentSteps({})".format(Def.Consts.num_hs)

    return flag


-def check_files_in_dir(dir_path):
+def is_reward_reached(csv_path, p_valid_params, start_time, time_limit):
    """
-    Check if folder has files
-    :param dir_path: |string| folder path
-    :return: |Array| return files in folder
+    Check the result of the experiment, by collecting all the Evaluation Reward
+    and average should be bigger than the min reward threshold.
+    :param csv_path: csv file  (results)
+    :param p_valid_params: experiment test params
+    :param start_time: start time of the test
+    :param time_limit: timeout of the test
+    :return: |Bool| true if reached the reward minimum
    """
-    start_time = time.time()
-    entities = None
-    while time.time() - start_time < Def.TimeOuts.wait_for_files:
-        # wait until logs created
-        if os.path.exists(dir_path):
-            entities = os.listdir(dir_path)
-            if len(entities) > 0:
-                break
-        time.sleep(1)
+    win_size = 10
+    last_num_episodes = 0
+    csv = None
+    reward_reached = False

-    assert len(entities) > 0, \
-        Def.Consts.ASSERT_MSG.format("num files > 0", len(entities))
-    return entities
+    while csv is None or (csv['Episode #'].values[-1]
+           < p_valid_params.max_episodes_to_achieve_reward and
+           time.time() - start_time < time_limit):

+        csv = pd.read_csv(csv_path)

-def find_string_in_logs(log_path, str):
-    """
-    Find string into the log file
-    :param log_path: |string| log path
-    :param str: |string| search text
-    :return: |bool| true if string found in the log file
-    """
-    start_time = time.time()
-    while time.time() - start_time < Def.TimeOuts.wait_for_files:
-        # wait until logs created
-        if os.path.exists(log_path):
+        if 'Evaluation Reward' not in csv.keys():
+            continue
+
+        rewards = csv['Evaluation Reward'].values
+
+        rewards = rewards[~np.isnan(rewards)]
+        if len(rewards) >= 1:
+            averaged_rewards = np.convolve(rewards, np.ones(
+                min(len(rewards), win_size)) / win_size, mode='valid')
+
+        else:
+            # May be in heat-up steps
+            time.sleep(1)
+            continue
+
+        if csv['Episode #'].shape[0] - last_num_episodes <= 0:
+            continue
+
+        last_num_episodes = csv['Episode #'].values[-1]
+
+        # check if reward is enough
+        if np.any(averaged_rewards >= p_valid_params.min_reward_threshold):
+            reward_reached = True
            break
        time.sleep(1)

-    if not os.path.exists(log_path):
-        return False
-
-    if str in open(log_path, 'r').read():
-        return True
-    return False
+    return reward_reached


-def get_csv_path(clres):
-    """
-    Get the csv path with the results - reading csv paths will take some time
-    :param clres: object of files that test is creating
-    :return: |Array| csv path
-    """
-    return test_utils.read_csv_paths(test_path=clres.exp_path,
-                                     filename_pattern=clres.fn_pattern)
-
-
-def validate_args_results(flag, clres=None, process=None, start_time=None,
-                          timeout=None):
+def validate_arg_result(flag, p_valid_params, clres=None, process=None,
+                        start_time=None, timeout=Def.TimeOuts.test_time_limit):
    """
    Validate results of one argument.
    :param flag: flag to check
+    :param p_valid_params: params test per preset
    :param clres: object of files paths (results of test experiment)
    :param process: process object
    :param start_time: start time of the test
@@ -186,38 +191,11 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
        -asc, --apply_stop_condition: Once selected, coach stopped when 
                                      required success rate reached
        """
-        while time.time() - start_time < timeout:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.REACHED_REQ_ASC):
-                assert True, Def.Consts.ASSERT_MSG. \
-                    format(Def.Consts.REACHED_REQ_ASC, "Message Not Found")
-                break
-
-    elif flag[0] == "-d" or flag[0] == "--open_dashboard":
-        """
-        -d, --open_dashboard: Once selected, firefox browser will open to show
-                              coach's Dashboard.
-        """
-        proc_id = None
-        start_time = time.time()
-        while time.time() - start_time < Def.TimeOuts.wait_for_files:
-            for proc in psutil.process_iter():
-                if proc.name() == Def.DASHBOARD_PROC:
-                    assert proc.name() == Def.DASHBOARD_PROC, \
-                        Def.Consts.ASSERT_MSG. format(Def.DASHBOARD_PROC,
-                                                      proc.name())
-                    proc_id = proc.pid
-                    break
-            if proc_id:
-                break
-
-        if proc_id:
-            # kill firefox process
-            os.kill(proc_id, signal.SIGKILL)
-        else:
-            assert False, Def.Consts.ASSERT_MSG.format("Found Firefox process",
-                                                       proc_id)
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.REACHED_REQ_ASC,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.REACHED_REQ_ASC,
+                                         "Message Not Found")

    elif flag[0] == "--print_networks_summary":
        """
@@ -254,18 +232,19 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
        assert os.path.isdir(tensorboard_path), \
            Def.Consts.ASSERT_MSG.format("tensorboard path", tensorboard_path)

-        # check if folder contain files
-        check_files_in_dir(dir_path=tensorboard_path)
+        # check if folder contain files and check extensions
+        files = get_files_from_dir(dir_path=tensorboard_path)
+        assert any(".tfevents." in file for file in files)

    elif flag[0] == "-onnx" or flag[0] == "--export_onnx_graph":
        """
        -onnx, --export_onnx_graph: Once selected, warning message should 
                                    appear, it should be with another flag.
        """
-        if find_string_in_logs(log_path=clres.stdout.name,
-                               str=Def.Consts.ONNX_WARNING):
-            assert True, Def.Consts.ASSERT_MSG.format(
-                Def.Consts.ONNX_WARNING, "Not found")
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.ONNX_WARNING,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.ONNX_WARNING, "Not found")

    elif flag[0] == "-dg" or flag[0] == "--dump_gifs":
        """
@@ -287,7 +266,7 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
                break

        # check if folder contain files
-        check_files_in_dir(dir_path=gifs_path)
+        get_files_from_dir(dir_path=gifs_path)
        # TODO: check if play window is opened

    elif flag[0] == "-dm" or flag[0] == "--dump_mp4":
@@ -310,7 +289,7 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
                break

        # check if folder contain files
-        check_files_in_dir(dir_path=videos_path)
+        get_files_from_dir(dir_path=videos_path)
        # TODO: check if play window is opened

    elif flag[0] == "--nocolor":
@@ -318,37 +297,181 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
        --nocolor: Once selected, check if color prefix is replacing the actual
                   color; example: '## agent: ...'
        """
-        while time.time() - start_time < timeout:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.COLOR_PREFIX):
-                assert True, Def.Consts.ASSERT_MSG. \
-                    format(Def.Consts.COLOR_PREFIX, "Color Prefix Not Found")
-                break
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.COLOR_PREFIX,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.COLOR_PREFIX,
+                                         "Color Prefix Not Found")

    elif flag[0] == "--evaluate":
        """
        --evaluate: Once selected, Coach start testing, there is not training.
        """
-        tries = 5
-        while time.time() - start_time < timeout and tries > 0:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.TRAINING):
-                assert False, Def.Consts.ASSERT_MSG.format(
-                    "Training Not Found", Def.Consts.TRAINING)
-            else:
-                time.sleep(1)
-                tries -= 1
-        assert True, Def.Consts.ASSERT_MSG.format("Training Found",
-                                                  Def.Consts.TRAINING)
+        # wait until files created
+        get_csv_path(clres=clres)
+        time.sleep(15)
+        assert not find_string_in_logs(log_path=clres.stdout.name,
+                                       str=Def.Consts.TRAINING), \
+            Def.Consts.ASSERT_MSG.format("Training Not Found",
+                                         Def.Consts.TRAINING)

    elif flag[0] == "--play":
        """
-        --play: Once selected alone, warning message should appear, it should
-                be with another flag.
+        --play: Once selected alone, an warning message should appear, it 
+                should be with another flag.
        """
-        if find_string_in_logs(log_path=clres.stdout.name,
-                               str=Def.Consts.PLAY_WARNING):
-            assert True, Def.Consts.ASSERT_MSG.format(
-                Def.Consts.ONNX_WARNING, "Not found")
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.PLAY_WARNING,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.PLAY_WARNING, "Not found")
+
+    elif flag[0] == "-et" or flag[0] == "--environment_type":
+        """
+        -et, --environment_type: Once selected check csv results is created.
+        """
+        csv_path = get_csv_path(clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+    elif flag[0] == "-s" or flag[0] == "--checkpoint_save_secs":
+        """
+        -s, --checkpoint_save_secs: Once selected, check if files added to the
+                                    experiment path.
+        """
+        csv_path = get_csv_path(clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        exp_path = os.path.dirname(csv_path[0])
+        checkpoint_path = os.path.join(exp_path, Def.Path.checkpoint)
+
+        # wait until video folder were created
+        while time.time() - start_time < timeout:
+            if os.path.isdir(checkpoint_path):
+                assert os.path.isdir(checkpoint_path), \
+                    Def.Consts.ASSERT_MSG.format("checkpoint path",
+                                                 checkpoint_path)
+                break
+
+        # check if folder contain files
+        get_files_from_dir(dir_path=checkpoint_path)
+
+    elif flag[0] == "-ew" or flag[0] == "--evaluation_worker":
+        """
+        -ew, --evaluation_worker: Once selected, check that an evaluation 
+                                  worker is created. e.g. by checking that it's
+                                  csv file is created.        
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+    elif flag[0] == "-cp" or flag[0] == "--custom_parameter":
+        """
+        -cp, --custom_parameter: Once selected, check that the total steps are
+                                 around the given param with +/- gap.
+                                 also, check the heat-up param      
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        # read csv file
+        csv = pd.read_csv(csv_path[0])
+
+        # check heat-up value
+        results = []
+        while csv["In Heatup"].values[-1] == 1:
+            csv = pd.read_csv(csv_path[0])
+            last_step = csv["Total steps"].values
+            time.sleep(1)
+            results.append(last_step[-1])
+
+        assert results[-1] >= Def.Consts.num_hs, \
+            Def.Consts.ASSERT_MSG.format("bigger than " + Def.Consts.num_hs,
+                                         results[-1])
+
+    elif flag[0] == "-f" or flag[0] == "--framework":
+        """
+        -f, --framework: Once selected, f = tensorflow or mxnet
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        get_reward = is_reward_reached(csv_path=csv_path[0],
+                                       p_valid_params=p_valid_params,
+                                       start_time=start_time,
+                                       time_limit=timeout)
+
+        # check if experiment is working and reached the reward
+        assert get_reward, Def.Consts.ASSERT_MSG.format(
+            "Doesn't reached the reward", get_reward)
+
+        # check if there is no exception
+        assert not find_string_in_logs(log_path=clres.stdout.name,
+                                       str=Def.Consts.LOG_ERROR)
+
+        ret_val = process.poll()
+        assert ret_val is None, Def.Consts.ASSERT_MSG.format("None", ret_val)
+
+    elif flag[0] == "-crd" or flag[0] == "--checkpoint_restore_dir":
+
+        """
+        -crd, --checkpoint_restore_dir: Once selected alone, check that can't
+                                        restore checkpoint dir (negative test).
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.NO_CHECKPOINT), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.NO_CHECKPOINT, "Not found")
+
+    elif flag[0] == "--seed":
+        """
+        --seed: Once selected, check logs of process list if all are the same
+                results.
+        """
+        lst_csv = []
+        # wait until files created
+        csv_path = get_csv_path(clres=clres, extra_tries=10)
+
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("paths are not found", csv_path)
+
+        assert int(flag[1]) == len(csv_path), Def.Consts.ASSERT_MSG. \
+            format(len(csv_path), int(flag[1]))
+
+        # wait for getting results in csv's
+        for i in range(len(csv_path)):
+
+            lines_in_file = pd.read_csv(csv_path[i])
+            while len(lines_in_file['Episode #'].values) < 100 and \
+                    time.time() - start_time < Def.TimeOuts.test_time_limit:
+                lines_in_file = pd.read_csv(csv_path[i])
+                time.sleep(1)
+
+            lst_csv.append(pd.read_csv(csv_path[i],
+                                       nrows=Def.Consts.N_csv_lines))
+
+        assert len(lst_csv) > 1, Def.Consts.ASSERT_MSG.format("> 1",
+                                                              len(lst_csv))
+
+        df1 = lst_csv[0]
+        for df in lst_csv[1:]:
+            assert list(df1['Training Iter'].values) == list(
+                df['Training Iter'].values)
+
+            assert list(df1['ER #Transitions'].values) == list(
+                df['ER #Transitions'].values)
+
+            assert list(df1['Total steps'].values) == list(
+                df['Total steps'].values)
+
+    elif flag[0] == "-c" or flag[0] == "--use_cpu":
+        pass