From 4a8451ff0210e4f33a85c64817a9d3afb6f19839 Mon Sep 17 00:00:00 2001
From: anabwan <46447582+anabwan@users.noreply.github.com>
Date: Mon, 18 Mar 2019 11:21:43 +0200
Subject: [PATCH] tests: added new tests + utils code improved (#221)

* tests: added new tests + utils code improved

* new tests:
- test_preset_args_combination
- test_preset_mxnet_framework

* added more flags to test_preset_args
* added validation for flags in utils

* tests: added new tests + fixed utils

* tests: added new checkpoint test

* tests: added checkpoint test improve utils

* tests: added tests + improve validations

* bump integration CI run timeout.

* tests: improve timerun + add functional test marker
---
 .circleci/config.yml                |   2 +-
 rl_coach/tests/conftest.py          |  67 ++---
 rl_coach/tests/test_args.py         |  51 ----
 rl_coach/tests/test_checkpoint.py   | 111 +++++++-
 rl_coach/tests/test_coach_args.py   | 145 ++++++++++
 rl_coach/tests/utils/args_utils.py  | 411 ++++++++++++++++++----------
 rl_coach/tests/utils/definitions.py | 134 ++++++---
 rl_coach/tests/utils/test_utils.py  |  87 +++++-
 8 files changed, 730 insertions(+), 278 deletions(-)
 delete mode 100644 rl_coach/tests/test_args.py
 create mode 100644 rl_coach/tests/test_coach_args.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 37c172a..b598cd1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -157,7 +157,7 @@ jobs:
           name: run integration tests
           command: |
             python3 rl_coach/tests/test_eks.py  -c coach-test -bn ${CIRCLE_BUILD_NUM} -tn integration-test -tc 'make integration_tests_without_docker' -i 316971102342.dkr.ecr.us-west-2.amazonaws.com/coach:$(git describe --tags --always --dirty) -cpu 2048 -mem 4096
-          no_output_timeout: 20m
+          no_output_timeout: 30m
       - run:
           name: cleanup
           command: |
diff --git a/rl_coach/tests/conftest.py b/rl_coach/tests/conftest.py
index 4cfdd1e..2886483 100644
--- a/rl_coach/tests/conftest.py
+++ b/rl_coach/tests/conftest.py
@@ -15,10 +15,9 @@
 #
 """PyTest configuration."""
 
-import configparser as cfgparser
 import os
-import platform
 import shutil
+import sys
 import pytest
 import rl_coach.tests.utils.args_utils as a_utils
 import rl_coach.tests.utils.presets_utils as p_utils
@@ -26,44 +25,12 @@ from rl_coach.tests.utils.definitions import Definitions as Def
 from os import path
 
 
-def pytest_collection_modifyitems(config, items):
-    """pytest built in method to pre-process cli options"""
-    global test_config
-    test_config = cfgparser.ConfigParser()
-    str_rootdir = str(config.rootdir)
-    str_inifile = str(config.inifile)
-    # Get the relative path of the inifile
-    # By default is an absolute path but relative path when -c option used
-    config_path = os.path.relpath(str_inifile, str_rootdir)
-    config_path = os.path.join(str_rootdir, config_path)
-    assert (os.path.exists(config_path))
-    test_config.read(config_path)
-
-
-def pytest_runtest_setup(item):
-    """Called before test is run."""
-    if len(item.own_markers) < 1:
-        return
-    if (item.own_markers[0].name == "unstable" and
-            "unstable" not in item.config.getoption("-m")):
-        pytest.skip("skipping unstable test")
-
-    if item.own_markers[0].name == "linux_only":
-        if platform.system() != 'Linux':
-            pytest.skip("Skipping test that isn't Linux OS.")
-
-    if item.own_markers[0].name == "golden_test":
-        """ do some custom configuration for golden tests. """
-        # TODO: add custom functionality
-        pass
-
-
 @pytest.fixture(scope="module", params=list(p_utils.collect_presets()))
 def preset_name(request):
     """
     Return all preset names
     """
-    return request.param
+    yield request.param
 
 
 @pytest.fixture(scope="function", params=list(a_utils.collect_args()))
@@ -71,7 +38,7 @@ def flag(request):
     """
     Return flags names in function scope
     """
-    return request.param
+    yield request.param
 
 
 @pytest.fixture(scope="module", params=list(a_utils.collect_preset_for_args()))
@@ -80,7 +47,26 @@ def preset_args(request):
     Return preset names that can be used for args testing only; working in
     module scope
     """
-    return request.param
+    yield request.param
+
+
+@pytest.fixture(scope="module", params=list(a_utils.collect_preset_for_seed()))
+def preset_args_for_seed(request):
+    """
+    Return preset names that can be used for args testing only and for special
+    action when using seed argument; working in module scope
+    """
+    yield request.param
+
+
+@pytest.fixture(scope="module",
+                params=list(a_utils.collect_preset_for_mxnet()))
+def preset_for_mxnet_args(request):
+    """
+    Return preset names that can be used for args testing only; this special
+    fixture will be used for mxnet framework only. working in module scope
+    """
+    yield request.param
 
 
 @pytest.fixture(scope="function")
@@ -105,6 +91,7 @@ def clres(request):
 
     p_valid_params = p_utils.validation_params(p_name)
 
+    sys.path.append('.')
     test_name = 'ExpName_{}'.format(p_name)
     test_path = os.path.join(Def.Path.experiments, test_name)
     if path.exists(test_path):
@@ -113,7 +100,7 @@ def clres(request):
     # get the stdout for logs results
     log_file_name = 'test_log_{}.txt'.format(p_name)
     stdout = open(log_file_name, 'w')
-    fn_pattern = 'worker_0*.csv' if p_valid_params.num_workers > 1 else '*.csv'
+    fn_pattern = '*.csv' if p_valid_params.num_workers > 1 else 'worker_0*.csv'
 
     res = CreateCsvLog(test_path, stdout, fn_pattern)
 
@@ -123,5 +110,5 @@ def clres(request):
     if path.exists(res.exp_path):
         shutil.rmtree(res.exp_path)
 
-    if os.path.exists(res.exp_path):
-        os.remove(res.stdout)
+    if path.exists(res.stdout.name):
+        os.remove(res.stdout.name)
diff --git a/rl_coach/tests/test_args.py b/rl_coach/tests/test_args.py
deleted file mode 100644
index deb5779..0000000
--- a/rl_coach/tests/test_args.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# Copyright (c) 2019 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import subprocess
-import time
-import rl_coach.tests.utils.args_utils as a_utils
-import rl_coach.tests.utils.presets_utils as p_utils
-from rl_coach.tests.utils.definitions import Definitions as Def
-
-
-def test_preset_args(preset_args, flag, clres, start_time=time.time(),
-                     time_limit=Def.TimeOuts.test_time_limit):
-    """ Test command arguments - the test will check all flags one-by-one."""
-
-    p_valid_params = p_utils.validation_params(preset_args)
-
-    run_cmd = [
-        'python3', 'rl_coach/coach.py',
-        '-p', '{}'.format(preset_args),
-        '-e', '{}'.format("ExpName_" + preset_args),
-    ]
-
-    if p_valid_params.reward_test_level:
-        lvl = ['-lvl', '{}'.format(p_valid_params.reward_test_level)]
-        run_cmd.extend(lvl)
-
-    # add flags to run command
-    test_flag = a_utils.add_one_flag_value(flag=flag)
-    run_cmd.extend(test_flag)
-    print(str(run_cmd))
-
-    # run command
-    p = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)
-
-    # validate results
-    a_utils.validate_args_results(test_flag, clres, p, start_time, time_limit)
-
-    # Close process
-    p.kill()
diff --git a/rl_coach/tests/test_checkpoint.py b/rl_coach/tests/test_checkpoint.py
index 5fb8991..b1ecc5b 100644
--- a/rl_coach/tests/test_checkpoint.py
+++ b/rl_coach/tests/test_checkpoint.py
@@ -1,24 +1,125 @@
+#
+# Copyright (c) 2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 import os
+import shutil
+import subprocess
+import time
 import pytest
+import signal
 import tempfile
-
+import numpy as np
+import pandas as pd
+import rl_coach.tests.utils.args_utils as a_utils
+import rl_coach.tests.utils.test_utils as test_utils
 from rl_coach import checkpoint
+from rl_coach.tests.utils.definitions import Definitions as Def
 
 
 @pytest.mark.unit_test
 def test_get_checkpoint_state():
-    files = ['4.test.ckpt.ext', '2.test.ckpt.ext', '3.test.ckpt.ext', '1.test.ckpt.ext', 'prefix.10.test.ckpt.ext']
+    files = ['4.test.ckpt.ext', '2.test.ckpt.ext', '3.test.ckpt.ext',
+             '1.test.ckpt.ext', 'prefix.10.test.ckpt.ext']
     with tempfile.TemporaryDirectory() as temp_dir:
         [open(os.path.join(temp_dir, fn), 'a').close() for fn in files]
-        checkpoint_state = checkpoint.get_checkpoint_state(temp_dir, all_checkpoints=True)
-        assert checkpoint_state.model_checkpoint_path == os.path.join(temp_dir, '4.test.ckpt')
+        checkpoint_state = \
+            checkpoint.get_checkpoint_state(temp_dir,
+                                            all_checkpoints=True)
+        assert checkpoint_state.model_checkpoint_path == os.path.join(
+            temp_dir, '4.test.ckpt')
         assert checkpoint_state.all_model_checkpoint_paths == \
                [os.path.join(temp_dir, f[:-4]) for f in sorted(files[:-1])]
 
-        reader = checkpoint.CheckpointStateReader(temp_dir, checkpoint_state_optional=False)
+        reader = \
+            checkpoint.CheckpointStateReader(temp_dir,
+                                             checkpoint_state_optional=False)
         assert reader.get_latest() is None
         assert len(reader.get_all()) == 0
 
         reader = checkpoint.CheckpointStateReader(temp_dir)
         assert reader.get_latest().num == 4
         assert [ckp.num for ckp in reader.get_all()] == [1, 2, 3, 4]
+
+
+@pytest.mark.functional_test
+def test_restore_checkpoint(preset_args, clres, start_time=time.time()):
+    """ Create checkpoint and restore them in second run."""
+
+    def _create_cmd_and_run(flag):
+
+        run_cmd = [
+            'python3', 'rl_coach/coach.py',
+            '-p', '{}'.format(preset_args),
+            '-e', '{}'.format("ExpName_" + preset_args),
+        ]
+        test_flag = a_utils.add_one_flag_value(flag=flag)
+        run_cmd.extend(test_flag)
+
+        p = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)
+
+        return p
+
+    create_cp_proc = _create_cmd_and_run(flag=['--checkpoint_save_secs', '5'])
+
+    # wait for checkpoint files
+    csv_list = a_utils.get_csv_path(clres=clres)
+    assert len(csv_list) > 0
+    exp_dir = os.path.dirname(csv_list[0])
+
+    checkpoint_dir = os.path.join(exp_dir, Def.Path.checkpoint)
+
+    checkpoint_test_dir = os.path.join(Def.Path.experiments, Def.Path.test_dir)
+    if os.path.exists(checkpoint_test_dir):
+        shutil.rmtree(checkpoint_test_dir)
+
+    entities = a_utils.get_files_from_dir(checkpoint_dir)
+
+    while not any("10_Step" in file for file in entities) and time.time() - \
+            start_time < Def.TimeOuts.test_time_limit:
+        entities = a_utils.get_files_from_dir(checkpoint_dir)
+        time.sleep(1)
+
+    assert len(entities) > 0
+    assert "checkpoint" in entities
+    assert any(".ckpt." in file for file in entities)
+
+    # send CTRL+C to close experiment
+    create_cp_proc.send_signal(signal.SIGINT)
+
+    csv = pd.read_csv(csv_list[0])
+    rewards = csv['Evaluation Reward'].values
+    rewards = rewards[~np.isnan(rewards)]
+    min_reward = np.amin(rewards)
+
+    if os.path.isdir(checkpoint_dir):
+        shutil.copytree(exp_dir, checkpoint_test_dir)
+        shutil.rmtree(exp_dir)
+
+    create_cp_proc.kill()
+    checkpoint_test_dir = "{}/{}".format(checkpoint_test_dir,
+                                         Def.Path.checkpoint)
+    # run second time with checkpoint folder  (restore)
+    restore_cp_proc = _create_cmd_and_run(flag=['-crd', checkpoint_test_dir,
+                                                '--evaluate'])
+
+    new_csv_list = test_utils.get_csv_path(clres=clres)
+    time.sleep(10)
+
+    csv = pd.read_csv(new_csv_list[0])
+    res = csv['Episode Length'].values[-1]
+    assert res >= min_reward, \
+        Def.Consts.ASSERT_MSG.format(str(res) + ">=" + str(min_reward),
+                                     str(res) + " < " + str(min_reward))
+    restore_cp_proc.kill()
diff --git a/rl_coach/tests/test_coach_args.py b/rl_coach/tests/test_coach_args.py
new file mode 100644
index 0000000..23423fc
--- /dev/null
+++ b/rl_coach/tests/test_coach_args.py
@@ -0,0 +1,145 @@
+#
+# Copyright (c) 2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import subprocess
+import time
+import pytest
+import rl_coach.tests.utils.args_utils as a_utils
+import rl_coach.tests.utils.presets_utils as p_utils
+from rl_coach.tests.utils.definitions import Definitions as Def
+
+
+@pytest.mark.functional_test
+def test_preset_args(preset_args, flag, clres, start_time=time.time(),
+                     time_limit=Def.TimeOuts.test_time_limit):
+    """ Test command arguments - the test will check all flags one-by-one."""
+
+    p_valid_params = p_utils.validation_params(preset_args)
+
+    run_cmd = [
+        'python3', 'rl_coach/coach.py',
+        '-p', '{}'.format(preset_args),
+        '-e', '{}'.format("ExpName_" + preset_args),
+    ]
+
+    if p_valid_params.reward_test_level:
+        lvl = ['-lvl', '{}'.format(p_valid_params.reward_test_level)]
+        run_cmd.extend(lvl)
+
+    # add flags to run command
+    test_flag = a_utils.add_one_flag_value(flag=flag)
+    run_cmd.extend(test_flag)
+    print(str(run_cmd))
+
+    proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)
+
+    try:
+        a_utils.validate_arg_result(flag=test_flag,
+                                    p_valid_params=p_valid_params, clres=clres,
+                                    process=proc, start_time=start_time,
+                                    timeout=time_limit)
+    except AssertionError:
+        # close process once get assert false
+        proc.kill()
+        assert False
+
+    proc.kill()
+
+
+@pytest.mark.functional_test
+def test_preset_mxnet_framework(preset_for_mxnet_args, clres,
+                                start_time=time.time(),
+                                time_limit=Def.TimeOuts.test_time_limit):
+    """ Test command arguments - the test will check mxnet framework"""
+
+    flag = ['-f', 'mxnet']
+    p_valid_params = p_utils.validation_params(preset_for_mxnet_args)
+
+    run_cmd = [
+        'python3', 'rl_coach/coach.py',
+        '-p', '{}'.format(preset_for_mxnet_args),
+        '-e', '{}'.format("ExpName_" + preset_for_mxnet_args),
+    ]
+
+    # add flags to run command
+    test_flag = a_utils.add_one_flag_value(flag=flag)
+    run_cmd.extend(test_flag)
+
+    print(str(run_cmd))
+
+    proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)
+
+    try:
+        a_utils.validate_arg_result(flag=test_flag,
+                                    p_valid_params=p_valid_params, clres=clres,
+                                    process=proc, start_time=start_time,
+                                    timeout=time_limit)
+    except AssertionError:
+        # close process once get assert false
+        proc.kill()
+        assert False
+
+    proc.kill()
+
+
+@pytest.mark.functional_test
+def test_preset_seed(preset_args_for_seed, clres, start_time=time.time(),
+                     time_limit=Def.TimeOuts.test_time_limit):
+    """
+    Test command arguments - the test will check seed argument with all
+    presets
+    """
+
+    def close_processes():
+        """
+        close all processes that still active in the process list
+        """
+        for i in range(seed_num):
+            proc[i].kill()
+
+    proc = []
+    seed_num = 2
+    flag = ["--seed", str(seed_num)]
+    p_valid_params = p_utils.validation_params(preset_args_for_seed)
+
+    run_cmd = [
+        'python3', 'rl_coach/coach.py',
+        '-p', '{}'.format(preset_args_for_seed),
+        '-e', '{}'.format("ExpName_" + preset_args_for_seed),
+    ]
+
+    if p_valid_params.trace_test_levels:
+        lvl = ['-lvl', '{}'.format(p_valid_params.trace_test_levels[0])]
+        run_cmd.extend(lvl)
+
+    # add flags to run command
+    test_flag = a_utils.add_one_flag_value(flag=flag)
+    run_cmd.extend(test_flag)
+    print(str(run_cmd))
+
+    for _ in range(seed_num):
+        proc.append(subprocess.Popen(run_cmd, stdout=clres.stdout,
+                                     stderr=clres.stdout))
+
+    try:
+        a_utils.validate_arg_result(flag=test_flag,
+                                    p_valid_params=p_valid_params, clres=clres,
+                                    process=proc, start_time=start_time,
+                                    timeout=time_limit)
+    except AssertionError:
+        close_processes()
+        assert False
+
+    close_processes()
diff --git a/rl_coach/tests/utils/args_utils.py b/rl_coach/tests/utils/args_utils.py
index e990cfd..cf26012 100644
--- a/rl_coach/tests/utils/args_utils.py
+++ b/rl_coach/tests/utils/args_utils.py
@@ -16,17 +16,27 @@
 """Manage all command arguments."""
 
 import os
-import re
 import signal
 import time
-
-import psutil as psutil
-
-from rl_coach.logger import screen
-from rl_coach.tests.utils import test_utils
+import pandas as pd
+import numpy as np
+from rl_coach.tests.utils.test_utils import get_csv_path, get_files_from_dir, \
+    find_string_in_logs
 from rl_coach.tests.utils.definitions import Definitions as Def
 
 
+def collect_preset_for_mxnet():
+    """
+    Collect presets that relevant for args testing only.
+    This used for testing arguments for specific presets that defined in the
+    definitions (args_test under Presets).
+    :return: preset(s) list
+    """
+    for pn in Def.Presets.mxnet_args_test:
+        assert pn, Def.Consts.ASSERT_MSG.format("Preset name", pn)
+        yield pn
+
+
 def collect_preset_for_args():
     """
     Collect presets that relevant for args testing only.
@@ -39,19 +49,27 @@ def collect_preset_for_args():
         yield pn
 
 
+def collect_preset_for_seed():
+    """
+    Collect presets that relevant for seed argument testing only.
+    This used for testing arguments for specific presets that defined in the
+    definitions (args_test under Presets).
+    :return: preset(s) list
+    """
+    for pn in Def.Presets.seed_args_test:
+        assert pn, Def.Consts.ASSERT_MSG.format("Preset name", pn)
+        yield pn
+
+
 def collect_args():
     """
     Collect args from the cmd args list - on each test iteration, it will
     yield one line (one arg).
     :yield: one arg foe each test iteration
     """
-    for k, v in Def.Flags.cmd_args.items():
-        cmd = []
-        cmd.append(k)
-        if v is not None:
-            cmd.append(v)
-        assert cmd, Def.Consts.ASSERT_MSG.format("cmd array", str(cmd))
-        yield cmd
+    for i in Def.Flags.cmd_args:
+        assert i, Def.Consts.ASSERT_MSG.format("flag list", str(i))
+        yield i
 
 
 def add_one_flag_value(flag):
@@ -60,99 +78,86 @@ def add_one_flag_value(flag):
     :param flag: dict flag
     :return: flag with format
     """
-    if not flag or len(flag) > 2 or len(flag) == 0:
+    if not flag or len(flag) == 0:
         return []
 
     if len(flag) == 1:
         return flag
 
-    if Def.Flags.css in flag[1]:
-        flag[1] = 30
+    if Def.Flags.enw in flag[1]:
+        flag[1] = '2'
 
-    elif Def.Flags.crd in flag[1]:
-        # TODO: check dir of checkpoint
-        flag[1] = os.path.join(Def.Path.experiments)
+    elif Def.Flags.css in flag[1]:
+        flag[1] = '5'
 
-    elif Def.Flags.et in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+    elif Def.Flags.fw_ten in flag[1]:
+        flag[1] = "tensorflow"
 
-    elif Def.Flags.ept in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+    elif Def.Flags.fw_mx in flag[1]:
+        flag[1] = "mxnet"
 
     elif Def.Flags.cp in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
-
-    elif Def.Flags.seed in flag[1]:
-        flag[1] = 0
-
-    elif Def.Flags.dccp in flag[1]:
-        # TODO: add valid value
-        flag[1] = ""
+        flag[1] = "heatup_steps=EnvironmentSteps({})".format(Def.Consts.num_hs)
 
     return flag
 
 
-def check_files_in_dir(dir_path):
+def is_reward_reached(csv_path, p_valid_params, start_time, time_limit):
     """
-    Check if folder has files
-    :param dir_path: |string| folder path
-    :return: |Array| return files in folder
+    Check the result of the experiment, by collecting all the Evaluation Reward
+    and average should be bigger than the min reward threshold.
+    :param csv_path: csv file  (results)
+    :param p_valid_params: experiment test params
+    :param start_time: start time of the test
+    :param time_limit: timeout of the test
+    :return: |Bool| true if reached the reward minimum
     """
-    start_time = time.time()
-    entities = None
-    while time.time() - start_time < Def.TimeOuts.wait_for_files:
-        # wait until logs created
-        if os.path.exists(dir_path):
-            entities = os.listdir(dir_path)
-            if len(entities) > 0:
-                break
-        time.sleep(1)
+    win_size = 10
+    last_num_episodes = 0
+    csv = None
+    reward_reached = False
 
-    assert len(entities) > 0, \
-        Def.Consts.ASSERT_MSG.format("num files > 0", len(entities))
-    return entities
+    while csv is None or (csv['Episode #'].values[-1]
+           < p_valid_params.max_episodes_to_achieve_reward and
+           time.time() - start_time < time_limit):
 
+        csv = pd.read_csv(csv_path)
 
-def find_string_in_logs(log_path, str):
-    """
-    Find string into the log file
-    :param log_path: |string| log path
-    :param str: |string| search text
-    :return: |bool| true if string found in the log file
-    """
-    start_time = time.time()
-    while time.time() - start_time < Def.TimeOuts.wait_for_files:
-        # wait until logs created
-        if os.path.exists(log_path):
+        if 'Evaluation Reward' not in csv.keys():
+            continue
+
+        rewards = csv['Evaluation Reward'].values
+
+        rewards = rewards[~np.isnan(rewards)]
+        if len(rewards) >= 1:
+            averaged_rewards = np.convolve(rewards, np.ones(
+                min(len(rewards), win_size)) / win_size, mode='valid')
+
+        else:
+            # May be in heat-up steps
+            time.sleep(1)
+            continue
+
+        if csv['Episode #'].shape[0] - last_num_episodes <= 0:
+            continue
+
+        last_num_episodes = csv['Episode #'].values[-1]
+
+        # check if reward is enough
+        if np.any(averaged_rewards >= p_valid_params.min_reward_threshold):
+            reward_reached = True
             break
         time.sleep(1)
 
-    if not os.path.exists(log_path):
-        return False
-
-    if str in open(log_path, 'r').read():
-        return True
-    return False
+    return reward_reached
 
 
-def get_csv_path(clres):
-    """
-    Get the csv path with the results - reading csv paths will take some time
-    :param clres: object of files that test is creating
-    :return: |Array| csv path
-    """
-    return test_utils.read_csv_paths(test_path=clres.exp_path,
-                                     filename_pattern=clres.fn_pattern)
-
-
-def validate_args_results(flag, clres=None, process=None, start_time=None,
-                          timeout=None):
+def validate_arg_result(flag, p_valid_params, clres=None, process=None,
+                        start_time=None, timeout=Def.TimeOuts.test_time_limit):
     """
     Validate results of one argument.
     :param flag: flag to check
+    :param p_valid_params: params test per preset
     :param clres: object of files paths (results of test experiment)
     :param process: process object
     :param start_time: start time of the test
@@ -186,38 +191,11 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
         -asc, --apply_stop_condition: Once selected, coach stopped when 
                                       required success rate reached
         """
-        while time.time() - start_time < timeout:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.REACHED_REQ_ASC):
-                assert True, Def.Consts.ASSERT_MSG. \
-                    format(Def.Consts.REACHED_REQ_ASC, "Message Not Found")
-                break
-
-    elif flag[0] == "-d" or flag[0] == "--open_dashboard":
-        """
-        -d, --open_dashboard: Once selected, firefox browser will open to show
-                              coach's Dashboard.
-        """
-        proc_id = None
-        start_time = time.time()
-        while time.time() - start_time < Def.TimeOuts.wait_for_files:
-            for proc in psutil.process_iter():
-                if proc.name() == Def.DASHBOARD_PROC:
-                    assert proc.name() == Def.DASHBOARD_PROC, \
-                        Def.Consts.ASSERT_MSG. format(Def.DASHBOARD_PROC,
-                                                      proc.name())
-                    proc_id = proc.pid
-                    break
-            if proc_id:
-                break
-
-        if proc_id:
-            # kill firefox process
-            os.kill(proc_id, signal.SIGKILL)
-        else:
-            assert False, Def.Consts.ASSERT_MSG.format("Found Firefox process",
-                                                       proc_id)
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.REACHED_REQ_ASC,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.REACHED_REQ_ASC,
+                                         "Message Not Found")
 
     elif flag[0] == "--print_networks_summary":
         """
@@ -254,18 +232,19 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
         assert os.path.isdir(tensorboard_path), \
             Def.Consts.ASSERT_MSG.format("tensorboard path", tensorboard_path)
 
-        # check if folder contain files
-        check_files_in_dir(dir_path=tensorboard_path)
+        # check if folder contain files and check extensions
+        files = get_files_from_dir(dir_path=tensorboard_path)
+        assert any(".tfevents." in file for file in files)
 
     elif flag[0] == "-onnx" or flag[0] == "--export_onnx_graph":
         """
         -onnx, --export_onnx_graph: Once selected, warning message should 
                                     appear, it should be with another flag.
         """
-        if find_string_in_logs(log_path=clres.stdout.name,
-                               str=Def.Consts.ONNX_WARNING):
-            assert True, Def.Consts.ASSERT_MSG.format(
-                Def.Consts.ONNX_WARNING, "Not found")
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.ONNX_WARNING,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.ONNX_WARNING, "Not found")
 
     elif flag[0] == "-dg" or flag[0] == "--dump_gifs":
         """
@@ -287,7 +266,7 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
                 break
 
         # check if folder contain files
-        check_files_in_dir(dir_path=gifs_path)
+        get_files_from_dir(dir_path=gifs_path)
         # TODO: check if play window is opened
 
     elif flag[0] == "-dm" or flag[0] == "--dump_mp4":
@@ -310,7 +289,7 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
                 break
 
         # check if folder contain files
-        check_files_in_dir(dir_path=videos_path)
+        get_files_from_dir(dir_path=videos_path)
         # TODO: check if play window is opened
 
     elif flag[0] == "--nocolor":
@@ -318,37 +297,181 @@ def validate_args_results(flag, clres=None, process=None, start_time=None,
         --nocolor: Once selected, check if color prefix is replacing the actual
                    color; example: '## agent: ...'
         """
-        while time.time() - start_time < timeout:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.COLOR_PREFIX):
-                assert True, Def.Consts.ASSERT_MSG. \
-                    format(Def.Consts.COLOR_PREFIX, "Color Prefix Not Found")
-                break
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.COLOR_PREFIX,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.COLOR_PREFIX,
+                                         "Color Prefix Not Found")
 
     elif flag[0] == "--evaluate":
         """
         --evaluate: Once selected, Coach start testing, there is not training.
         """
-        tries = 5
-        while time.time() - start_time < timeout and tries > 0:
-
-            if find_string_in_logs(log_path=clres.stdout.name,
-                                   str=Def.Consts.TRAINING):
-                assert False, Def.Consts.ASSERT_MSG.format(
-                    "Training Not Found", Def.Consts.TRAINING)
-            else:
-                time.sleep(1)
-                tries -= 1
-        assert True, Def.Consts.ASSERT_MSG.format("Training Found",
-                                                  Def.Consts.TRAINING)
+        # wait until files created
+        get_csv_path(clres=clres)
+        time.sleep(15)
+        assert not find_string_in_logs(log_path=clres.stdout.name,
+                                       str=Def.Consts.TRAINING), \
+            Def.Consts.ASSERT_MSG.format("Training Not Found",
+                                         Def.Consts.TRAINING)
 
     elif flag[0] == "--play":
         """
-        --play: Once selected alone, warning message should appear, it should
-                be with another flag.
+        --play: Once selected alone, an warning message should appear, it 
+                should be with another flag.
         """
-        if find_string_in_logs(log_path=clres.stdout.name,
-                               str=Def.Consts.PLAY_WARNING):
-            assert True, Def.Consts.ASSERT_MSG.format(
-                Def.Consts.ONNX_WARNING, "Not found")
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.PLAY_WARNING,
+                                   wait_and_find=True), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.PLAY_WARNING, "Not found")
+
+    elif flag[0] == "-et" or flag[0] == "--environment_type":
+        """
+        -et, --environment_type: Once selected check csv results is created.
+        """
+        csv_path = get_csv_path(clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+    elif flag[0] == "-s" or flag[0] == "--checkpoint_save_secs":
+        """
+        -s, --checkpoint_save_secs: Once selected, check if files added to the
+                                    experiment path.
+        """
+        csv_path = get_csv_path(clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        exp_path = os.path.dirname(csv_path[0])
+        checkpoint_path = os.path.join(exp_path, Def.Path.checkpoint)
+
+        # wait until video folder were created
+        while time.time() - start_time < timeout:
+            if os.path.isdir(checkpoint_path):
+                assert os.path.isdir(checkpoint_path), \
+                    Def.Consts.ASSERT_MSG.format("checkpoint path",
+                                                 checkpoint_path)
+                break
+
+        # check if folder contain files
+        get_files_from_dir(dir_path=checkpoint_path)
+
+    elif flag[0] == "-ew" or flag[0] == "--evaluation_worker":
+        """
+        -ew, --evaluation_worker: Once selected, check that an evaluation 
+                                  worker is created. e.g. by checking that it's
+                                  csv file is created.        
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+    elif flag[0] == "-cp" or flag[0] == "--custom_parameter":
+        """
+        -cp, --custom_parameter: Once selected, check that the total steps are
+                                 around the given param with +/- gap.
+                                 also, check the heat-up param      
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        # read csv file
+        csv = pd.read_csv(csv_path[0])
+
+        # check heat-up value
+        results = []
+        while csv["In Heatup"].values[-1] == 1:
+            csv = pd.read_csv(csv_path[0])
+            last_step = csv["Total steps"].values
+            time.sleep(1)
+            results.append(last_step[-1])
+
+        assert results[-1] >= Def.Consts.num_hs, \
+            Def.Consts.ASSERT_MSG.format("bigger than " + Def.Consts.num_hs,
+                                         results[-1])
+
+    elif flag[0] == "-f" or flag[0] == "--framework":
+        """
+        -f, --framework: Once selected, f = tensorflow or mxnet
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+
+        get_reward = is_reward_reached(csv_path=csv_path[0],
+                                       p_valid_params=p_valid_params,
+                                       start_time=start_time,
+                                       time_limit=timeout)
+
+        # check if experiment is working and reached the reward
+        assert get_reward, Def.Consts.ASSERT_MSG.format(
+            "Doesn't reached the reward", get_reward)
+
+        # check if there is no exception
+        assert not find_string_in_logs(log_path=clres.stdout.name,
+                                       str=Def.Consts.LOG_ERROR)
+
+        ret_val = process.poll()
+        assert ret_val is None, Def.Consts.ASSERT_MSG.format("None", ret_val)
+
+    elif flag[0] == "-crd" or flag[0] == "--checkpoint_restore_dir":
+
+        """
+        -crd, --checkpoint_restore_dir: Once selected alone, check that can't
+                                        restore checkpoint dir (negative test).
+        """
+        # wait until files created
+        csv_path = get_csv_path(clres=clres)
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("path not found", csv_path)
+        assert find_string_in_logs(log_path=clres.stdout.name,
+                                   str=Def.Consts.NO_CHECKPOINT), \
+            Def.Consts.ASSERT_MSG.format(Def.Consts.NO_CHECKPOINT, "Not found")
+
+    elif flag[0] == "--seed":
+        """
+        --seed: Once selected, check logs of process list if all are the same
+                results.
+        """
+        lst_csv = []
+        # wait until files created
+        csv_path = get_csv_path(clres=clres, extra_tries=10)
+
+        assert len(csv_path) > 0, \
+            Def.Consts.ASSERT_MSG.format("paths are not found", csv_path)
+
+        assert int(flag[1]) == len(csv_path), Def.Consts.ASSERT_MSG. \
+            format(len(csv_path), int(flag[1]))
+
+        # wait for getting results in csv's
+        for i in range(len(csv_path)):
+
+            lines_in_file = pd.read_csv(csv_path[i])
+            while len(lines_in_file['Episode #'].values) < 100 and \
+                    time.time() - start_time < Def.TimeOuts.test_time_limit:
+                lines_in_file = pd.read_csv(csv_path[i])
+                time.sleep(1)
+
+            lst_csv.append(pd.read_csv(csv_path[i],
+                                       nrows=Def.Consts.N_csv_lines))
+
+        assert len(lst_csv) > 1, Def.Consts.ASSERT_MSG.format("> 1",
+                                                              len(lst_csv))
+
+        df1 = lst_csv[0]
+        for df in lst_csv[1:]:
+            assert list(df1['Training Iter'].values) == list(
+                df['Training Iter'].values)
+
+            assert list(df1['ER #Transitions'].values) == list(
+                df['ER #Transitions'].values)
+
+            assert list(df1['Total steps'].values) == list(
+                df['Total steps'].values)
+
+    elif flag[0] == "-c" or flag[0] == "--use_cpu":
+        pass
diff --git a/rl_coach/tests/utils/definitions.py b/rl_coach/tests/utils/definitions.py
index e79dd71..56740e0 100644
--- a/rl_coach/tests/utils/definitions.py
+++ b/rl_coach/tests/utils/definitions.py
@@ -36,56 +36,112 @@ class Definitions:
         cp = "custom_parameter"
         seed = "seed"
         dccp = "distributed_coach_config_path"
+        enw = "num_workers"
+        fw_ten = "framework_tensorflow"
+        fw_mx = "framework_mxnet"
+        et = "rl_coach.environments.gym_environment:Atari"
 
         """
         Arguments that can be tested for python coach command
-         ** None    = Flag - no need for string or int
-         ** {}      = Add format for this parameter
+         ** 1 parameter    = Flag - no need for string or int
+         ** 2 parameters   = add value for the selected flag
         """
-        cmd_args = {
-            # '-l': None,
-            # '-e': '{}',
-            # '-r': None,
-            # '-n': '{' + enw + '}',
-            # '-c': None,
-            # '-ew': None,
-            '--play': None,
-            '--evaluate': None,
-            # '-v': None,
-            # '-tfv': None,
-            '--nocolor': None,
-            # '-s': '{' + css + '}',
-            # '-crd': '{' + crd + '}',
-            '-dg': None,
-            '-dm': None,
-            # '-et': '{' + et + '}',
-            # '-ept': '{' + ept + '}',
-            # '-lvl': '{level}',
-            # '-cp': '{' + cp + '}',
-            '--print_networks_summary': None,
-            '-tb': None,
-            '-ns': None,
-            '-d': None,
-            # '--seed': '{' + seed + '}',
-            '-onnx': None,
-            '-dc': None,
-            # '-dcp': '{' + dccp + '}',
-            '-asc': None,
-            '--dump_worker_logs': None,
-        }
+
+        cmd_args = [
+            ['-ew'],
+            ['--play'],
+            ['--evaluate'],
+            ['-f', fw_ten],
+            ['--nocolor'],
+            ['-s', css],
+            # ['-crd', crd], # Tested in checkpoint test
+            ['-dg'],
+            ['-dm'],
+            ['-cp', cp],
+            ['--print_networks_summary'],
+            ['-tb'],
+            ['-ns'],
+            ['-onnx'],
+            ['-asc'],
+            ['--dump_worker_logs'],
+            # ['-et', et],
+            # '-lvl': '{level}',  # TODO: Add test validation on args_utils
+            # '-e': '{}',  # TODO: Add test validation on args_utils
+            # '-l': None,  # TODO: Add test validation on args_utils
+            # '-c': None,  # TODO: Add test validation using nvidia-smi
+            # '-v': None,  # TODO: Add test validation on args_utils
+            # '--seed': '{' + seed + '}', # DONE - new test added
+            # '-dc': None,  # TODO: Add test validation on args_utils
+            # '-dcp': '{}'  # TODO: Add test validation on args_utils
+            # ['-n', enw],  # Duplicated arg test
+            # ['-d'],  # Arg can't be automated - no GUI in the CI
+            # '-r': None,  # No automation test
+            # '-tfv': None,  # No automation test
+            # '-ept': '{' + ept + '}',  # No automation test - not supported
+        ]
 
     class Presets:
         # Preset list for testing the flags/ arguments of python coach command
         args_test = [
             "CartPole_A3C",
-            # "CartPole_NEC",
+        ]
+
+        # Preset list for mxnet framework testing
+        mxnet_args_test = [
+            "CartPole_DQN"
+        ]
+
+        # Preset for testing seed argument
+        seed_args_test = [
+            "Atari_A3C",
+            "Atari_A3C_LSTM",
+            "Atari_Bootstrapped_DQN",
+            "Atari_C51",
+            "Atari_DDQN",
+            "Atari_DQN_with_PER",
+            "Atari_DQN",
+            "Atari_DQN_with_PER",
+            "Atari_Dueling_DDQN",
+            "Atari_Dueling_DDQN_with_PER_OpenAI",
+            "Atari_NStepQ",
+            "Atari_QR_DQN",
+            "Atari_Rainbow",
+            "Atari_UCB_with_Q_Ensembles",
+            "BitFlip_DQN",
+            "BitFlip_DQN_HER",
+            "CartPole_A3C",
+            "CartPole_ClippedPPO",
+            "CartPole_DFP",
+            "CartPole_DQN",
+            "CartPole_Dueling_DDQN",
+            "CartPole_NStepQ",
+            "CartPole_PAL",
+            "CartPole_PG",
+            "ControlSuite_DDPG",
+            "ExplorationChain_Bootstrapped_DQN",
+            "ExplorationChain_Dueling_DDQN",
+            "ExplorationChain_UCB_Q_ensembles",
+            "Fetch_DDPG_HER_baselines",
+            "InvertedPendulum_PG",
+            "MontezumaRevenge_BC",
+            "Mujoco_A3C",
+            "Mujoco_A3C_LSTM",
+            "Mujoco_ClippedPPO",
+            "Mujoco_DDPG",
+            "Mujoco_NAF",
+            "Mujoco_PPO",
+            "Pendulum_HAC",
+            "Starcraft_CollectMinerals_A3C",
+            "Starcraft_CollectMinerals_Dueling_DDQN",
         ]
 
     class Path:
         experiments = "./experiments"
         tensorboard = "tensorboard"
+        test_dir = "test_dir"
         gifs = "gifs"
         videos = "videos"
+        checkpoint = "checkpoint"
 
     class Consts:
         ASSERT_MSG = "Expected: {}, Actual: {}."
@@ -105,7 +161,17 @@ class Definitions:
                        "These flags can not be used together. For human " \
                        "control, please use the --play flag together with " \
                        "the environment type flag (-et)"
+        NO_CHECKPOINT = "No checkpoint to restore in:"
+        LOG_ERROR = "KeyError:"
+
+        num_hs = 200  # heat-up steps (used for agent custom parameters)
+
+        f_comb = 2  # number of flags in cmd for creating flags combinations
+
+        N_csv_lines = 100  # number of lines to validate on csv file
 
     class TimeOuts:
         test_time_limit = 60 * 60
         wait_for_files = 20
+        wait_for_csv = 240
+        test_run = 60
diff --git a/rl_coach/tests/utils/test_utils.py b/rl_coach/tests/utils/test_utils.py
index f9cdbd1..85b72bb 100644
--- a/rl_coach/tests/utils/test_utils.py
+++ b/rl_coach/tests/utils/test_utils.py
@@ -18,7 +18,9 @@
 import glob
 import sys
 import time
+import os
 from os import path
+from rl_coach.tests.utils.definitions import Definitions as Def
 
 
 def print_progress(averaged_rewards, last_num_episodes, start_time, time_limit,
@@ -49,20 +51,99 @@ def print_progress(averaged_rewards, last_num_episodes, start_time, time_limit,
     sys.stdout.flush()
 
 
-def read_csv_paths(test_path, filename_pattern, read_csv_tries=120):
+def read_csv_paths(test_path, filename_pattern, read_csv_tries=120,
+                   extra_tries=0):
     """
     Return file path once it found
     :param test_path: test folder path
     :param filename_pattern: csv file pattern
     :param read_csv_tries: number of iterations until file found
+    :param extra_tries: add number of extra tries to check after getting all
+                        the paths.
     :return: |string| return csv file path
     """
     csv_paths = []
     tries_counter = 0
-    while not csv_paths:
+    while not csv_paths or extra_tries > 0:
         csv_paths = glob.glob(path.join(test_path, '*', filename_pattern))
         if tries_counter > read_csv_tries:
             break
-        tries_counter += 1
         time.sleep(1)
+        tries_counter += 1
+
+        if csv_paths and extra_tries > 0:
+            extra_tries -= 1
+
     return csv_paths
+
+
+def get_files_from_dir(dir_path):
+    """
+    Check if folder has files
+    :param dir_path: |string| folder path
+    :return: |list| return files in folder
+    """
+    start_time = time.time()
+    entities = None
+    while time.time() - start_time < Def.TimeOuts.wait_for_files:
+        # wait until logs created
+        if os.path.exists(dir_path):
+            entities = os.listdir(dir_path)
+            if len(entities) > 0:
+                break
+        time.sleep(1)
+
+    assert len(entities) > 0, \
+        Def.Consts.ASSERT_MSG.format("num files > 0", len(entities))
+    return entities
+
+
+def find_string_in_logs(log_path, str, timeout=Def.TimeOuts.wait_for_files,
+                        wait_and_find=False):
+    """
+    Find string into the log file
+    :param log_path: |string| log path
+    :param str: |string| search text
+    :param timeout: |int| timeout for searching on file
+    :param wait_and_find: |bool| true if need to wait until reaching timeout
+    :return: |bool| true if string found in the log file
+    """
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        # wait until logs created
+        if os.path.exists(log_path):
+            break
+        time.sleep(1)
+
+    if not os.path.exists(log_path):
+        return False
+
+    with open(log_path, 'r') as fr:
+        if str in fr.read():
+            return True
+        fr.close()
+
+    while time.time() - start_time < Def.TimeOuts.test_time_limit \
+            and wait_and_find:
+        with open(log_path, 'r') as fr:
+            if str in fr.read():
+                return True
+            fr.close()
+    return False
+
+
+def get_csv_path(clres, tries_for_csv=Def.TimeOuts.wait_for_csv,
+                 extra_tries=0):
+    """
+    Get the csv path with the results - reading csv paths will take some time
+    :param clres: object of files that test is creating
+    :param tries_for_csv: timeout of tires until getting all csv files
+    :param extra_tries: add number of extra tries to check after getting all
+                        the paths.
+    :return: |list| csv path
+    """
+    return read_csv_paths(test_path=clres.exp_path,
+                          filename_pattern=clres.fn_pattern,
+                          read_csv_tries=tries_for_csv,
+                          extra_tries=extra_tries)
+