pre-release 0.10.0

2026-02-15 21:45:46 +01:00 · 2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions
--- a/rl_coach/tests/init.py
+++ b/rl_coach/tests/init.py
--- a/rl_coach/tests/agents/init.py
+++ b/rl_coach/tests/agents/init.py
--- a/rl_coach/tests/agents/test_agent_external_communication.py
+++ b/rl_coach/tests/agents/test_agent_external_communication.py
@@ -0,0 +1,33 @@
+import os
+import sys
+
+from rl_coach.base_parameters import TaskParameters
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+import tensorflow as tf
+from tensorflow import logging
+import pytest
+logging.set_verbosity(logging.INFO)
+
+
+@pytest.mark.unit_test
+def test_get_QActionStateValue_predictions():
+    tf.reset_default_graph()
+    from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
+    assert cartpole_dqn_graph_manager
+    cartpole_dqn_graph_manager.create_graph(task_parameters=
+                                            TaskParameters(framework_type="tensorflow",
+                                                           experiment_path="./experiments/test"))
+    cartpole_dqn_graph_manager.improve_steps.num_steps = 1
+    cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
+
+    # graph_manager.improve()
+    #
+    # agent = graph_manager.level_managers[0].composite_agents['simple_rl_agent'].agents['simple_rl_agent/agent']
+    # some_state = agent.memory.sample(1)[0].state
+    # cartpole_dqn_predictions = agent.get_predictions(states=some_state, prediction_type=QActionStateValue)
+    # assert cartpole_dqn_predictions.shape == (1, 2)
+
+
+if __name__ == '__main__':
+    test_get_QActionStateValue_predictions()
--- a/rl_coach/tests/architectures/init.py
+++ b/rl_coach/tests/architectures/init.py
--- a/rl_coach/tests/architectures/tensorflow_components/init.py
+++ b/rl_coach/tests/architectures/tensorflow_components/init.py
--- a/rl_coach/tests/architectures/tensorflow_components/embedders/init.py
+++ b/rl_coach/tests/architectures/tensorflow_components/embedders/init.py
--- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py
+++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py
@@ -0,0 +1,45 @@
+import os
+import sys
+
+from rl_coach.base_parameters import EmbedderScheme
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder
+import tensorflow as tf
+from tensorflow import logging
+
+logging.set_verbosity(logging.INFO)
+
+@pytest.fixture
+def reset():
+    tf.reset_default_graph()
+
+
+@pytest.mark.unit_test
+def test_embedder(reset):
+    embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty)
+
+    # make sure the ops where not created yet
+    assert len(tf.get_default_graph().get_operations()) == 0
+
+    # call the embedder
+    input_ph, output_ph = embedder()
+
+    # make sure that now the ops were created
+    assert len(tf.get_default_graph().get_operations()) > 0
+
+    # try feeding a batch of one example  # TODO: consider auto converting to batch
+    input = np.random.rand(1, 10, 10)
+    sess = tf.Session()
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert output.shape == (1, 100)  # should have flattened the input
+
+    # now make sure the returned placeholders behave the same
+    output = sess.run(output_ph, {input_ph: input})
+    assert output.shape == (1, 100)  # should have flattened the input
+
+    # make sure the naming is correct
+    assert embedder.get_name() == "test"
--- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py
+++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py
@@ -0,0 +1,99 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+from rl_coach.architectures.tensorflow_components.embedders.image_embedder import ImageEmbedder, EmbedderScheme
+import tensorflow as tf
+from tensorflow import logging
+
+logging.set_verbosity(logging.INFO)
+
+@pytest.fixture
+def reset():
+    tf.reset_default_graph()
+
+
+@pytest.mark.unit_test
+def test_embedder(reset):
+    # creating an embedder with a non-image input
+    with pytest.raises(ValueError):
+        embedder = ImageEmbedder(np.array([100]), name="test")
+    with pytest.raises(ValueError):
+        embedder = ImageEmbedder(np.array([100, 100]), name="test")
+    with pytest.raises(ValueError):
+        embedder = ImageEmbedder(np.array([10, 100, 100, 100]), name="test")
+
+    # creating a simple image embedder
+    embedder = ImageEmbedder(np.array([100, 100, 10]), name="test")
+
+    # make sure the ops where not created yet
+    assert len(tf.get_default_graph().get_operations()) == 0
+
+    # call the embedder
+    input_ph, output_ph = embedder()
+
+    # make sure that now the ops were created
+    assert len(tf.get_default_graph().get_operations()) > 0
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 100, 100, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert output.shape == (1, 5184)
+
+    # now make sure the returned placeholders behave the same
+    output = sess.run(output_ph, {input_ph: input})
+    assert output.shape == (1, 5184)
+
+    # make sure the naming is correct
+    assert embedder.get_name() == "test"
+
+
+@pytest.mark.unit_test
+def test_complex_embedder(reset):
+    # creating a deep vector embedder
+    embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", scheme=EmbedderScheme.Deep)
+
+    # call the embedder
+    embedder()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 100, 100, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert output.shape == (1, 256)  # should have flattened the input
+
+
+@pytest.mark.unit_test
+def test_activation_function(reset):
+    # creating a deep image embedder with relu
+    embedder = ImageEmbedder(np.array([100, 100, 10]), name="relu", scheme=EmbedderScheme.Deep,
+                             activation_function=tf.nn.relu)
+
+    # call the embedder
+    embedder()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 100, 100, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert np.all(output >= 0)  # should have flattened the input
+
+    # creating a deep image embedder with tanh
+    embedder_tanh = ImageEmbedder(np.array([100, 100, 10]), name="tanh", scheme=EmbedderScheme.Deep,
+                                  activation_function=tf.nn.tanh)
+
+    # call the embedder
+    embedder_tanh()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 100, 100, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
+    assert np.all(output >= -1) and np.all(output <= 1)
--- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py
+++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py
@@ -0,0 +1,95 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder, EmbedderScheme
+import tensorflow as tf
+from tensorflow import logging
+
+logging.set_verbosity(logging.INFO)
+
+@pytest.fixture
+def reset():
+    tf.reset_default_graph()
+
+
+@pytest.mark.unit_test
+def test_embedder(reset):
+    # creating a vector embedder with a matrix
+    with pytest.raises(ValueError):
+        embedder = VectorEmbedder(np.array([10, 10]), name="test")
+
+    # creating a simple vector embedder
+    embedder = VectorEmbedder(np.array([10]), name="test")
+
+    # make sure the ops where not created yet
+    assert len(tf.get_default_graph().get_operations()) == 0
+
+    # call the embedder
+    input_ph, output_ph = embedder()
+
+    # make sure that now the ops were created
+    assert len(tf.get_default_graph().get_operations()) > 0
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert output.shape == (1, 256)
+
+    # now make sure the returned placeholders behave the same
+    output = sess.run(output_ph, {input_ph: input})
+    assert output.shape == (1, 256)
+
+    # make sure the naming is correct
+    assert embedder.get_name() == "test"
+
+
+@pytest.mark.unit_test
+def test_complex_embedder(reset):
+    # creating a deep vector embedder
+    embedder = VectorEmbedder(np.array([10]), name="test", scheme=EmbedderScheme.Deep)
+
+    # call the embedder
+    embedder()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert output.shape == (1, 128)  # should have flattened the input
+
+
+@pytest.mark.unit_test
+def test_activation_function(reset):
+    # creating a deep vector embedder with relu
+    embedder = VectorEmbedder(np.array([10]), name="relu", scheme=EmbedderScheme.Deep,
+                              activation_function=tf.nn.relu)
+
+    # call the embedder
+    embedder()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder.output, {embedder.input: input})
+    assert np.all(output >= 0)  # should have flattened the input
+
+    # creating a deep vector embedder with tanh
+    embedder_tanh = VectorEmbedder(np.array([10]), name="tanh", scheme=EmbedderScheme.Deep,
+                                   activation_function=tf.nn.tanh)
+
+    # call the embedder
+    embedder_tanh()
+
+    # try feeding a batch of one example
+    input = np.random.rand(1, 10)
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
+    assert np.all(output >= -1) and np.all(output <= 1)
--- a/rl_coach/tests/environments/init.py
+++ b/rl_coach/tests/environments/init.py
--- a/rl_coach/tests/environments/test_gym_environment.py
+++ b/rl_coach/tests/environments/test_gym_environment.py
@@ -0,0 +1,67 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.environments.gym_environment import GymEnvironment
+from rl_coach.base_parameters import VisualizationParameters
+import numpy as np
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, ImageObservationSpace, VectorObservationSpace
+
+
+@pytest.fixture()
+def atari_env():
+    # create a breakout gym environment
+    env = GymEnvironment(level='Breakout-v0',
+                         seed=1,
+                         frame_skip=4,
+                         visualization_parameters=VisualizationParameters())
+    return env
+
+
+@pytest.fixture()
+def continuous_env():
+    # create a breakout gym environment
+    env = GymEnvironment(level='Pendulum-v0',
+                         seed=1,
+                         frame_skip=1,
+                         visualization_parameters=VisualizationParameters())
+    return env
+
+
+@pytest.mark.unit_test
+def test_gym_discrete_environment(atari_env):
+    # observation space
+    assert type(atari_env.state_space['observation']) == ImageObservationSpace
+    assert np.all(atari_env.state_space['observation'].shape == [210, 160, 3])
+    assert np.all(atari_env.last_env_response.next_state['observation'].shape == (210, 160, 3))
+
+    # action space
+    assert type(atari_env.action_space) == DiscreteActionSpace
+    assert np.all(atari_env.action_space.high == 3)
+
+    # make sure that the seed is working properly
+    assert np.sum(atari_env.last_env_response.next_state['observation']) == 4115856
+
+
+@pytest.mark.unit_test
+def test_gym_continuous_environment(continuous_env):
+    # observation space
+    assert type(continuous_env.state_space['observation']) == VectorObservationSpace
+    assert np.all(continuous_env.state_space['observation'].shape == [3])
+    assert np.all(continuous_env.last_env_response.next_state['observation'].shape == (3,))
+
+    # action space
+    assert type(continuous_env.action_space) == BoxActionSpace
+    assert np.all(continuous_env.action_space.shape == np.array([1]))
+
+    # make sure that the seed is working properly
+    assert np.sum(continuous_env.last_env_response.next_state['observation']) == 1.2661630859028832
+
+
+@pytest.mark.unit_test
+def test_step(atari_env):
+    result = atari_env.step(0)
+
+if __name__ == '__main__':
+    test_gym_continuous_environment(continuous_env())
--- a/rl_coach/tests/exploration_policies/init.py
+++ b/rl_coach/tests/exploration_policies/init.py
--- a/rl_coach/tests/exploration_policies/test_additive_noise.py
+++ b/rl_coach/tests/exploration_policies/test_additive_noise.py
@@ -0,0 +1,44 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+from rl_coach.exploration_policies.additive_noise import AdditiveNoise
+from rl_coach.schedules import LinearSchedule
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_init():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+    noise_schedule = LinearSchedule(1.0, 1.0, 1000)
+
+    # additive noise doesn't work for discrete controls
+    with pytest.raises(ValueError):
+        policy = AdditiveNoise(action_space, noise_schedule, 0)
+
+    # additive noise requires a bounded range for the actions
+    action_space = BoxActionSpace(np.array([10]))
+    with pytest.raises(ValueError):
+        policy = AdditiveNoise(action_space, noise_schedule, 0)
+
+
+@pytest.mark.unit_test
+def test_get_action():
+    # make sure noise is in range
+    action_space = BoxActionSpace(np.array([10]), -1, 1)
+    noise_schedule = LinearSchedule(1.0, 1.0, 1000)
+    policy = AdditiveNoise(action_space, noise_schedule, 0)
+
+    # the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6
+    for i in range(1000):
+        action = policy.get_action(np.zeros([10]))
+        assert np.all(action < 10)
+        # make sure there is no clipping of the action since it should be the environment that clips actions
+        assert np.all(action != 1.0)
+        assert np.all(action != -1.0)
+        # make sure that each action element has a different value
+        assert np.all(action[0] != action[1:])
--- a/rl_coach/tests/exploration_policies/test_e_greedy.py
+++ b/rl_coach/tests/exploration_policies/test_e_greedy.py
@@ -0,0 +1,81 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.spaces import DiscreteActionSpace
+from rl_coach.exploration_policies.e_greedy import EGreedy
+from rl_coach.schedules import LinearSchedule
+import numpy as np
+from rl_coach.core_types import RunPhase
+
+
+@pytest.mark.unit_test
+def test_get_action():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+    epsilon_schedule = LinearSchedule(1.0, 1.0, 1000)
+    policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0)
+
+    # verify that test phase gives greedy actions (evaluation_epsilon = 0)
+    policy.change_phase(RunPhase.TEST)
+    for i in range(100):
+        best_action = policy.get_action(np.array([10, 20, 30]))
+        assert best_action == 2
+
+    # verify that train phase gives uniform actions (exploration = 1)
+    policy.change_phase(RunPhase.TRAIN)
+    counters = np.array([0, 0, 0])
+    for i in range(30000):
+        best_action = policy.get_action(np.array([10, 20, 30]))
+        counters[best_action] += 1
+    assert np.all(counters > 9500)  # this is noisy so we allow 5% error
+
+    # TODO: test continuous actions
+
+
+@pytest.mark.unit_test
+def test_change_phase():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+    epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
+    policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
+
+    # verify schedule not applying if not in training phase
+    assert policy.get_control_param() == 1.0
+    policy.change_phase(RunPhase.TEST)
+    best_action = policy.get_action(np.array([10, 20, 30]))
+    assert policy.epsilon_schedule.current_value == 1.0
+    policy.change_phase(RunPhase.HEATUP)
+    best_action = policy.get_action(np.array([10, 20, 30]))
+    assert policy.epsilon_schedule.current_value == 1.0
+    policy.change_phase(RunPhase.UNDEFINED)
+    best_action = policy.get_action(np.array([10, 20, 30]))
+    assert policy.epsilon_schedule.current_value == 1.0
+
+
+@pytest.mark.unit_test
+def test_get_control_param():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+    epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
+    policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
+
+    # verify schedule applies to TRAIN phase
+    policy.change_phase(RunPhase.TRAIN)
+    for i in range(999):
+        best_action = policy.get_action(np.array([10, 20, 30]))
+        assert 1.0 > policy.get_control_param() > 0.1
+    best_action = policy.get_action(np.array([10, 20, 30]))
+    assert policy.get_control_param() == 0.1
+
+    # test phases
+    policy.change_phase(RunPhase.TEST)
+    assert policy.get_control_param() == 0.01
+
+    policy.change_phase(RunPhase.TRAIN)
+    assert policy.get_control_param() == 0.1
+
+    policy.change_phase(RunPhase.HEATUP)
+    assert policy.get_control_param() == 0.1
--- a/rl_coach/tests/exploration_policies/test_greedy.py
+++ b/rl_coach/tests/exploration_policies/test_greedy.py
@@ -0,0 +1,34 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+from rl_coach.exploration_policies.greedy import Greedy
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_get_action():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+    policy = Greedy(action_space)
+
+    best_action = policy.get_action(np.array([10, 20, 30]))
+    assert best_action == 2
+
+    # continuous control
+    action_space = BoxActionSpace(np.array([10]))
+    policy = Greedy(action_space)
+
+    best_action = policy.get_action(np.array([1, 1, 1]))
+    assert np.all(best_action == np.array([1, 1, 1]))
+
+
+@pytest.mark.unit_test
+def test_get_control_param():
+    action_space = DiscreteActionSpace(3)
+    policy = Greedy(action_space)
+    assert policy.get_control_param() == 0
+
--- a/rl_coach/tests/exploration_policies/test_ou_process.py
+++ b/rl_coach/tests/exploration_policies/test_ou_process.py
@@ -0,0 +1,85 @@
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
+from rl_coach.exploration_policies.ou_process import OUProcess
+from rl_coach.core_types import RunPhase
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_init():
+    # discrete control
+    action_space = DiscreteActionSpace(3)
+
+    # OU process doesn't work for discrete controls
+    with pytest.raises(ValueError):
+        policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
+
+
+@pytest.mark.unit_test
+def test_get_action():
+    action_space = BoxActionSpace(np.array([10]), -1, 1)
+    policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
+
+    # make sure no noise is added in the testing phase
+    policy.change_phase(RunPhase.TEST)
+    assert np.all(policy.get_action(np.zeros((10,))) == np.zeros((10,)))
+    rand_action = np.random.rand(10)
+    assert np.all(policy.get_action(rand_action) == rand_action)
+
+    # make sure the noise added in the training phase matches the golden
+    policy.change_phase(RunPhase.TRAIN)
+    np.random.seed(0)
+    targets = [
+        [0.03528105, 0.00800314, 0.01957476, 0.04481786, 0.03735116, - 0.01954556, 0.01900177, - 0.00302714, - 0.00206438, 0.00821197],
+        [0.03812664, 0.03708061, 0.03477594, 0.04720655, 0.04619107, - 0.01285253, 0.04886435, - 0.00712728, 0.00419904, - 0.00887816],
+        [-0.01297129, 0.0501159, 0.05202989, 0.03231604, 0.09153997, - 0.04192699, 0.04973065, - 0.01086383, 0.03485043, 0.0205179],
+        [-0.00985937, 0.05762904, 0.03422214, - 0.00733221, 0.08449019, - 0.03875808, 0.07428674, 0.01319463, 0.02706904, 0.01445132],
+        [-3.08205658e-02, 2.91710492e-02, 6.25166679e-05, 3.16906342e-02, 7.42126579e-02, - 4.74808080e-02, 4.91565431e-02, 2.87312413e-02, - 5.23598615e-03, 1.01820670e-02],
+        [-0.04869908, 0.03687993, - 0.01015365, 0.0080463, 0.0735748, -0.03886669, 0.05043773, 0.03475195, - 0.01791719, 0.00291706],
+        [-0.06209959, 0.02965198, - 0.02640642, - 0.0264874, 0.07704975, - 0.04686344, 0.01778333, 0.04397284, - 0.03604524, 0.00395305],
+        [-0.04745568, 0.03220199, - 0.003592, -0.05115743, 0.08501953, - 0.06051278, 0.0003496, 0.03235188, - 0.04224025, 0.00507241],
+        [-0.07071122, 0.05018632, 0.00572484, - 0.08183114, 0.11469956, - 0.02253448, 0.02392484, 0.02872103, - 0.06361306, 0.02615637],
+        [-0.07870404, 0.07458503, 0.00988462, - 0.06221653, 0.12171218, - 0.00838049, 0.02411092, 0.06440972, - 0.0610112, 0.03417],
+        [-0.04096233, 0.04755527, - 0.01553497, - 0.04276638, 0.098128, 0.03050032, 0.01581443, 0.04939621, - 0.02249135, 0.06374613],
+        [-0.00357018, 0.06562861, - 0.03274395, - 0.00452232, 0.09266981, 0.04651895, 0.03474365, 0.04624661, - 0.01018727, 0.08212651],
+    ]
+    for i in range(10):
+        current_noise = policy.get_action(np.zeros((10,)))
+        assert np.all(np.abs(current_noise - targets[i]) < 1e-7)
+
+    # get some statistics. check very roughly that the mean acts according to the definition of the policy
+
+    # mean of 0
+    vals = []
+    for i in range(50000):
+        current_noise = policy.get_action(np.zeros((10,)))
+        vals.append(current_noise)
+    assert np.all(np.abs(np.mean(vals, axis=0)) < 1)
+
+    # mean of 10
+    policy = OUProcess(action_space, mu=10, theta=0.1, sigma=0.2, dt=0.01)
+    policy.change_phase(RunPhase.TRAIN)
+    vals = []
+    for i in range(50000):
+        current_noise = policy.get_action(np.zeros((10,)))
+        vals.append(current_noise)
+    assert np.all(np.abs(np.mean(vals, axis=0) - 10) < 1)
+
+    # plot the noise values - only used for understanding how the noise actually looks
+    # import matplotlib.pyplot as plt
+    # vals = np.array(vals)
+    # for i in range(10):
+    #     plt.plot(list(range(10000)), vals[:, i])
+    #     plt.plot(list(range(10000)), vals[:, i])
+    #     plt.plot(list(range(10000)), vals[:, i])
+    # plt.show()
+
+
+if __name__ == "__main__":
+    test_get_action()
--- a/rl_coach/tests/filters/init.py
+++ b/rl_coach/tests/filters/init.py
--- a/rl_coach/tests/filters/action/init.py
+++ b/rl_coach/tests/filters/action/init.py
--- a/rl_coach/tests/filters/action/test_attention_discretization.py
+++ b/rl_coach/tests/filters/action/test_attention_discretization.py
@@ -0,0 +1,44 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.filters.action.attention_discretization import AttentionDiscretization
+from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace, AttentionActionSpace
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_filter():
+    filter = AttentionDiscretization(2)
+
+    # passing an output space that is wrong
+    with pytest.raises(ValueError):
+        filter.validate_output_action_space(DiscreteActionSpace(10))
+    with pytest.raises(ValueError):
+        filter.validate_output_action_space(BoxActionSpace(10))
+
+    # 1 dimensional box
+    output_space = AttentionActionSpace(2, 0, 83)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    assert np.all(filter.target_actions == np.array([[[0., 0.], [41.5, 41.5]],
+                                     [[0., 41.5], [41.5, 83.]],
+                                     [[41.5, 0], [83., 41.5]],
+                                     [[41.5, 41.5], [83., 83.]]]))
+    assert input_space.actions == list(range(4))
+
+    action = 2
+
+    result = filter.filter(action)
+    assert np.all(result == np.array([[41.5, 0], [83., 41.5]]))
+    assert output_space.val_matches_space_definition(result)
+
+    # force int bins
+    filter = AttentionDiscretization(2, force_int_bins=True)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    assert np.all(filter.target_actions == np.array([[[0., 0.], [41, 41]],
+                                                     [[0., 41], [41, 83.]],
+                                                     [[41, 0], [83., 41]],
+                                                     [[41, 41], [83., 83.]]]))
--- a/rl_coach/tests/filters/action/test_box_discretization.py
+++ b/rl_coach/tests/filters/action/test_box_discretization.py
@@ -0,0 +1,45 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.filters.action.box_discretization import BoxDiscretization
+from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
+
+
+@pytest.mark.unit_test
+def test_filter():
+    filter = BoxDiscretization(9)
+
+    # passing an output space that is wrong
+    with pytest.raises(ValueError):
+        filter.validate_output_action_space(DiscreteActionSpace(10))
+
+    # 1 dimensional box
+    output_space = BoxActionSpace(1, 5, 15)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    assert filter.target_actions == [[5.], [6.25], [7.5], [8.75], [10.], [11.25], [12.5], [13.75], [15.]]
+    assert input_space.actions == list(range(9))
+
+    action = 2
+
+    result = filter.filter(action)
+    assert result == [7.5]
+    assert output_space.val_matches_space_definition(result)
+
+    # 2 dimensional box
+    filter = BoxDiscretization(3)
+    output_space = BoxActionSpace(2, 5, 15)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    assert filter.target_actions == [[5., 5.], [5., 10.], [5., 15.],
+                                     [10., 5.], [10., 10.], [10., 15.],
+                                     [15., 5.], [15., 10.], [15., 15.]]
+    assert input_space.actions == list(range(9))
+
+    action = 2
+
+    result = filter.filter(action)
+    assert result == [5., 15.]
+    assert output_space.val_matches_space_definition(result)
--- a/rl_coach/tests/filters/action/test_box_masking.py
+++ b/rl_coach/tests/filters/action/test_box_masking.py
@@ -0,0 +1,27 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.filters.action.box_masking import BoxMasking
+from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_filter():
+    filter = BoxMasking(10, 20)
+
+    # passing an output space that is wrong
+    with pytest.raises(ValueError):
+        filter.validate_output_action_space(DiscreteActionSpace(10))
+
+    # 1 dimensional box
+    output_space = BoxActionSpace(1, 5, 30)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    action = np.array([2])
+    result = filter.filter(action)
+    assert result == np.array([12])
+    assert output_space.val_matches_space_definition(result)
+
--- a/rl_coach/tests/filters/action/test_linear_box_to_box_map.py
+++ b/rl_coach/tests/filters/action/test_linear_box_to_box_map.py
@@ -0,0 +1,29 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.filters.action.linear_box_to_box_map import LinearBoxToBoxMap
+from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_filter():
+    filter = LinearBoxToBoxMap(10, 20)
+
+    # passing an output space that is wrong
+    with pytest.raises(ValueError):
+        filter.validate_output_action_space(DiscreteActionSpace(10))
+
+    # 1 dimensional box
+    output_space = BoxActionSpace(1, 5, 35)
+    input_space = filter.get_unfiltered_action_space(output_space)
+
+    action = np.array([2])
+
+    action = np.array([12])
+    result = filter.filter(action)
+    assert result == np.array([11])
+    assert output_space.val_matches_space_definition(result)
+
--- a/rl_coach/tests/filters/observation/init.py
+++ b/rl_coach/tests/filters/observation/init.py
--- a/rl_coach/tests/filters/observation/test_observation_crop_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_crop_filter.py
@@ -0,0 +1,90 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
+from rl_coach.filters.filter import InputFilter
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+
+
+@pytest.fixture
+def env_response():
+    observation = np.random.rand(10, 20, 30)
+    return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+
+@pytest.mark.unit_test
+def test_filter(env_response):
+    crop_low = np.array([0, 5, 10])
+    crop_high = np.array([5, 10, 20])
+    crop_filter = InputFilter()
+    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
+
+    result = crop_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (5, 5, 10)
+
+    # validate the content of the filtered observation
+    assert np.all(filtered_observation == unfiltered_observation[0:5, 5:10, 10:20])
+
+    # crop with -1 on some axes
+    crop_low = np.array([0, 0, 0])
+    crop_high = np.array([5, -1, -1])
+    crop_filter = InputFilter()
+    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
+
+    result = crop_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (5, 20, 30)
+
+    # validate the content of the filtered observation
+    assert np.all(filtered_observation == unfiltered_observation[0:5, :, :])
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    crop_low = np.array([0, 5, 10])
+    crop_high = np.array([5, 10, 20])
+    crop_filter = InputFilter()
+    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
+
+    observation_space = ObservationSpace(np.array([5, 10, 20]))
+    filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
+
+    # make sure the new observation space shape is calculated correctly
+    assert np.all(filtered_observation_space.shape == np.array([5, 5, 10]))
+
+    # make sure the original observation space is unchanged
+    assert np.all(observation_space.shape == np.array([5, 10, 20]))
+
+    # crop_high is bigger than the observation space
+    high_error_observation_space = ObservationSpace(np.array([3, 8, 14]))
+    with pytest.raises(ValueError):
+        crop_filter.get_filtered_observation_space('observation', high_error_observation_space)
+
+    # crop_low is bigger than the observation space
+    low_error_observation_space = ObservationSpace(np.array([3, 3, 10]))
+    with pytest.raises(ValueError):
+        crop_filter.get_filtered_observation_space('observation', low_error_observation_space)
+
+    # crop with -1 on some axes
+    crop_low = np.array([0, 0, 0])
+    crop_high = np.array([5, -1, -1])
+    crop_filter = InputFilter()
+    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
+
+    observation_space = ObservationSpace(np.array([5, 10, 20]))
+    filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
+
+    # make sure the new observation space shape is calculated correctly
+    assert np.all(filtered_observation_space.shape == np.array([5, 10, 20]))
--- a/rl_coach/tests/filters/observation/test_observation_reduction_by_sub_parts_name_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_reduction_by_sub_parts_name_filter.py
@@ -0,0 +1,84 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
+from rl_coach.spaces import VectorObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.mark.unit_test
+def test_filter():
+    # Keep
+    observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
+    env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
+    reduction_filter = InputFilter()
+    reduction_filter.add_observation_filter('observation', 'reduce',
+                                          ObservationReductionBySubPartsNameFilter(
+                                              ["a"],
+                                              ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
+                                          ))
+
+    reduction_filter.get_filtered_observation_space('observation', observation_space)
+    result = reduction_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.shape == (3,)
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (1,)
+
+    # Discard
+    reduction_filter = InputFilter()
+    reduction_filter.add_observation_filter('observation', 'reduce',
+                                          ObservationReductionBySubPartsNameFilter(
+                                              ["a"],
+                                              ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
+                                          ))
+    reduction_filter.get_filtered_observation_space('observation', observation_space)
+    result = reduction_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.shape == (3,)
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (2,)
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    # Keep
+    observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
+    env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
+    reduction_filter = InputFilter()
+    reduction_filter.add_observation_filter('observation', 'reduce',
+                                            ObservationReductionBySubPartsNameFilter(
+                                                ["a"],
+                                                ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
+                                            ))
+
+    filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(filtered_observation_space.shape == np.array([1]))
+    assert filtered_observation_space.measurements_names == ['a']
+
+    # Discard
+    observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
+    env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
+    reduction_filter = InputFilter()
+    reduction_filter.add_observation_filter('observation', 'reduce',
+                                            ObservationReductionBySubPartsNameFilter(
+                                                ["a"],
+                                                ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
+                                            ))
+
+    filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(filtered_observation_space.shape == np.array([2]))
+    assert filtered_observation_space.measurements_names == ['b', 'c']
--- a/rl_coach/tests/filters/observation/test_observation_rescale_size_by_factor_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_rescale_size_by_factor_filter.py
@@ -0,0 +1,66 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter, RescaleInterpolationType
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+@pytest.mark.filterwarnings('ignore:Conversion of')
+@pytest.mark.unit_test
+def test_filter():
+    # make an RGB observation smaller
+    env_response = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                          ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
+
+    result = rescale_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.shape == (20, 30, 3)
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (10, 15, 3)
+
+    # make a grayscale observation bigger
+    env_response = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                          ObservationRescaleSizeByFactorFilter(2, RescaleInterpolationType.BILINEAR))
+    result = rescale_filter.filter(env_response)[0]
+    filtered_observation = result.next_state['observation']
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (40, 60)
+    assert np.all(filtered_observation == np.ones([40, 60]))
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    # error on wrong number of channels
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                          ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
+    observation_space = ObservationSpace(np.array([10, 20, 5]))
+    with pytest.raises(ValueError):
+        filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
+
+    # error on wrong number of dimensions
+    observation_space = ObservationSpace(np.array([10, 20, 10, 3]))
+    with pytest.raises(ValueError):
+        filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
+
+    # make sure the new observation space shape is calculated correctly
+    observation_space = ObservationSpace(np.array([10, 20, 3]))
+    filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
+
+    # make sure the original observation space is unchanged
+    assert np.all(observation_space.shape == np.array([10, 20, 3]))
--- a/rl_coach/tests/filters/observation/test_observation_rescale_to_size_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_rescale_to_size_filter.py
@@ -0,0 +1,106 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
+from rl_coach.spaces import ObservationSpace, ImageObservationSpace, PlanarMapsObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.mark.filterwarnings('ignore:Conversion of')
+@pytest.mark.unit_test
+def test_filter():
+    # make an RGB observation smaller
+    transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                         ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 3]),
+                                                                                              high=255),
+                                                    RescaleInterpolationType.BILINEAR))
+
+    result = rescale_filter.filter(transition)[0]
+    unfiltered_observation = transition.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.shape == (20, 30, 3)
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (10, 20, 3)
+    assert np.all(filtered_observation == np.ones([10, 20, 3]))
+
+    # make a grayscale observation bigger
+    transition = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                         ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([40, 60]),
+                                                                                              high=255),
+                                                    RescaleInterpolationType.BILINEAR))
+    result = rescale_filter.filter(transition)[0]
+    filtered_observation = result.next_state['observation']
+
+    # validate the shape of the filtered observation
+    assert filtered_observation.shape == (40, 60)
+    assert np.all(filtered_observation == np.ones([40, 60]))
+
+    # rescale channels -> error
+    # with pytest.raises(ValueError):
+    #     InputFilter(
+    #         observation_filters=OrderedDict([('rescale',
+    #                                          ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]),
+    #                                                                                               high=255),
+    #                                                                         RescaleInterpolationType.BILINEAR))]))
+
+    # TODO: validate input to filter
+    # different number of axes -> error
+    # env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
+    # rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20])),
+    #                                                 RescaleInterpolationType.BILINEAR)
+    # with pytest.raises(ValueError):
+    #     result = rescale_filter.filter(transition)
+
+    # channels first -> error
+    with pytest.raises(ValueError):
+        ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([3, 10, 20]), high=255),
+                                       RescaleInterpolationType.BILINEAR)
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    # error on wrong number of channels
+    with pytest.raises(ValueError):
+        observation_filters = InputFilter()
+        observation_filters.add_observation_filter('observation', 'rescale',
+                                             ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 5]),
+                                                                                                  high=255),
+                                                                            RescaleInterpolationType.BILINEAR))
+
+    # mismatch and wrong number of channels
+    rescale_filter = InputFilter()
+    rescale_filter.add_observation_filter('observation', 'rescale',
+                                         ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 3]),
+                                                                                              high=255),
+                                                    RescaleInterpolationType.BILINEAR))
+
+    observation_space = PlanarMapsObservationSpace(np.array([10, 20, 5]), low=0, high=255)
+    with pytest.raises(ValueError):
+        rescale_filter.get_filtered_observation_space('observation', observation_space)
+
+    # error on wrong number of dimensions
+    observation_space = ObservationSpace(np.array([10, 20, 10, 3]), high=255)
+    with pytest.raises(ValueError):
+        rescale_filter.get_filtered_observation_space('observation', observation_space)
+
+    # make sure the new observation space shape is calculated correctly
+    observation_space = ImageObservationSpace(np.array([10, 20, 3]), high=255)
+    filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
+
+    # make sure the original observation space is unchanged
+    assert np.all(observation_space.shape == np.array([10, 20, 3]))
+
+    # TODO: test that the type of the observation space stays the same
--- a/rl_coach/tests/filters/observation/test_observation_rgb_to_y_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_rgb_to_y_filter.py
@@ -0,0 +1,47 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+
+from rl_coach.filters.filter import InputFilter
+
+@pytest.fixture
+def rgb_to_y_filter():
+    rgb_to_y_filter = InputFilter()
+    rgb_to_y_filter.add_observation_filter('observation', 'rgb_to_y', ObservationRGBToYFilter())
+    return rgb_to_y_filter
+
+
+@pytest.mark.unit_test
+def test_filter(rgb_to_y_filter):
+    # convert RGB observation to graysacle
+    observation = np.random.rand(20, 30, 3)*255.0
+    transition = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+    result = rgb_to_y_filter.filter(transition)[0]
+    unfiltered_observation = transition.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.shape == (20, 30, 3)
+
+    # make sure the filtering is done correctly
+    assert filtered_observation.shape == (20, 30)
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space(rgb_to_y_filter):
+    # error on observation space which are not RGB
+    observation_space = ObservationSpace(np.array([1, 2, 4]), 0, 100)
+    with pytest.raises(ValueError):
+        rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
+
+    observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
+    result = rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(result.shape == np.array([1, 2]))
--- a/rl_coach/tests/filters/observation/test_observation_squeeze_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_squeeze_filter.py
@@ -0,0 +1,72 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_squeeze_filter import ObservationSqueezeFilter
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.mark.unit_test
+def test_filter():
+    # make an RGB observation smaller
+    squeeze_filter = InputFilter()
+    squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
+    squeeze_filter_with_axis = InputFilter()
+    squeeze_filter_with_axis.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(2))
+
+    observation = np.random.rand(20, 30, 1, 3)
+    env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+    result = squeeze_filter.filter(env_response)[0]
+    result_with_axis = squeeze_filter_with_axis.filter(env_response)[0]
+    unfiltered_observation_shape = env_response.next_state['observation'].shape
+    filtered_observation_shape = result.next_state['observation'].shape
+    filtered_observation_with_axis_shape = result_with_axis.next_state['observation'].shape
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation_shape == observation.shape
+
+    # make sure the filtering is done correctly
+    assert filtered_observation_shape == (20, 30, 3)
+    assert filtered_observation_with_axis_shape == (20, 30, 3)
+
+    observation = np.random.rand(1, 30, 1, 3)
+    env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+    result = squeeze_filter.filter(env_response)[0]
+    assert result.next_state['observation'].shape == (30, 3)
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    # error on observation space with shape not matching the filter squeeze axis configuration
+    squeeze_filter = InputFilter()
+    squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(axis=3))
+
+    observation_space = ObservationSpace(np.array([20, 1, 30, 3]), 0, 100)
+    small_observation_space = ObservationSpace(np.array([20, 1, 30]), 0, 100)
+    with pytest.raises(ValueError):
+        squeeze_filter.get_filtered_observation_space('observation', observation_space)
+        squeeze_filter.get_filtered_observation_space('observation', small_observation_space)
+
+    # verify output observation space is correct
+    observation_space = ObservationSpace(np.array([1, 2, 3, 1]), 0, 200)
+    result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(result.shape == np.array([1, 2, 3]))
+
+    squeeze_filter = InputFilter()
+    squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
+
+    result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(result.shape == np.array([2, 3]))
+
+
+if __name__ == '__main__':
+    test_filter()
+    test_get_filtered_observation_space()
+
--- a/rl_coach/tests/filters/observation/test_observation_stacking_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_stacking_filter.py
@@ -0,0 +1,78 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.fixture
+def env_response():
+    observation = np.random.rand(20, 30, 1)
+    return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+
+@pytest.fixture
+def stack_filter():
+    stack_filter = InputFilter()
+    stack_filter.add_observation_filter('observation', 'stack', ObservationStackingFilter(4, stacking_axis=-1))
+    return stack_filter
+
+
+@pytest.mark.unit_test
+def test_filter(stack_filter, env_response):
+    # stack observation on empty stack
+    result = stack_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # validate that the shape of the unfiltered observation is unchanged
+    assert unfiltered_observation.shape == (20, 30, 1)
+    assert np.array(filtered_observation).shape == (20, 30, 1, 4)
+    assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
+
+    # stack observation on non-empty stack
+    result = stack_filter.filter(env_response)[0]
+    filtered_observation = result.next_state['observation']
+    assert np.array(filtered_observation).shape == (20, 30, 1, 4)
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space(stack_filter, env_response):
+    observation_space = ObservationSpace(np.array([5, 10, 20]))
+    filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
+
+    # make sure the new observation space shape is calculated correctly
+    assert np.all(filtered_observation_space.shape == np.array([5, 10, 20, 4]))
+
+    # make sure the original observation space is unchanged
+    assert np.all(observation_space.shape == np.array([5, 10, 20]))
+
+    # call after stack is already created with non-matching shape -> error
+    result = stack_filter.filter(env_response)[0]
+    with pytest.raises(ValueError):
+        filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
+
+
+@pytest.mark.unit_test
+def test_reset(stack_filter, env_response):
+    # stack observation on empty stack
+    result = stack_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
+
+    # reset and make sure the outputs are correct
+    stack_filter.reset()
+    unfiltered_observation = np.random.rand(20, 30, 1)
+    new_env_response = EnvResponse(next_state={'observation': unfiltered_observation}, reward=0, game_over=False)
+    result = stack_filter.filter(new_env_response)[0]
+    filtered_observation = result.next_state['observation']
+    assert np.all(np.array(filtered_observation)[:, :, :, 0] == unfiltered_observation)
+    assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
--- a/rl_coach/tests/filters/observation/test_observation_to_uint8_filter.py
+++ b/rl_coach/tests/filters/observation/test_observation_to_uint8_filter.py
@@ -0,0 +1,50 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
+from rl_coach.spaces import ObservationSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.mark.unit_test
+def test_filter():
+    # make an RGB observation smaller
+    uint8_filter = InputFilter()
+    uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=255))
+
+    observation = np.random.rand(20, 30, 3)*255.0
+    env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
+
+    result = uint8_filter.filter(env_response)[0]
+    unfiltered_observation = env_response.next_state['observation']
+    filtered_observation = result.next_state['observation']
+
+    # make sure the original observation is unchanged
+    assert unfiltered_observation.dtype == 'float64'
+
+    # make sure the filtering is done correctly
+    assert filtered_observation.dtype == 'uint8'
+    assert np.all(filtered_observation == observation.astype('uint8'))
+
+
+@pytest.mark.unit_test
+def test_get_filtered_observation_space():
+    # error on observation space with values not matching the filter configuration
+    uint8_filter = InputFilter()
+    uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=200))
+
+    observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
+    with pytest.raises(ValueError):
+        uint8_filter.get_filtered_observation_space('observation', observation_space)
+
+    # verify output observation space is correct
+    observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 200)
+    result = uint8_filter.get_filtered_observation_space('observation', observation_space)
+    assert np.all(result.high == 255)
+    assert np.all(result.low == 0)
+    assert np.all(result.shape == observation_space.shape)
--- a/rl_coach/tests/filters/reward/init.py
+++ b/rl_coach/tests/filters/reward/init.py
--- a/rl_coach/tests/filters/reward/test_reward_clipping_filter.py
+++ b/rl_coach/tests/filters/reward/test_reward_clipping_filter.py
@@ -0,0 +1,74 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
+from rl_coach.spaces import RewardSpace
+from rl_coach.core_types import EnvResponse
+
+from collections import OrderedDict
+from rl_coach.filters.filter import InputFilter
+
+
+@pytest.fixture
+def clip_filter():
+    return InputFilter(reward_filters=OrderedDict([('clip', RewardClippingFilter(2, 10))]))
+
+
+@pytest.mark.unit_test
+def test_filter(clip_filter):
+    transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
+    result = clip_filter.filter(transition)[0]
+    unfiltered_reward = transition.reward
+    filtered_reward = result.reward
+
+    # validate that the reward was clipped correctly
+    assert filtered_reward == 10
+
+    # make sure the original reward is unchanged
+    assert unfiltered_reward == 100
+
+    # reward in bounds
+    transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=5, game_over=False)
+    result = clip_filter.filter(transition)[0]
+    assert result.reward == 5
+
+    # reward below bounds
+    transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-5, game_over=False)
+    result = clip_filter.filter(transition)[0]
+    assert result.reward == 2
+
+
+@pytest.mark.unit_test
+def test_get_filtered_reward_space(clip_filter):
+    # reward is clipped
+    reward_space = RewardSpace(1, -100, 100)
+    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
+
+    # make sure the new reward space shape is calculated correctly
+    assert filtered_reward_space.shape == 1
+    assert filtered_reward_space.low == 2
+    assert filtered_reward_space.high == 10
+
+    # reward is unclipped
+    reward_space = RewardSpace(1, 5, 7)
+    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
+
+    # make sure the new reward space shape is calculated correctly
+    assert filtered_reward_space.shape == 1
+    assert filtered_reward_space.low == 5
+    assert filtered_reward_space.high == 7
+
+    # infinite reward is clipped
+    reward_space = RewardSpace(1, -np.inf, np.inf)
+    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
+
+    # make sure the new reward space shape is calculated correctly
+    assert filtered_reward_space.shape == 1
+    assert filtered_reward_space.low == 2
+    assert filtered_reward_space.high == 10
+
+
--- a/rl_coach/tests/filters/reward/test_reward_rescale_filter.py
+++ b/rl_coach/tests/filters/reward/test_reward_rescale_filter.py
@@ -0,0 +1,56 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter
+from rl_coach.spaces import RewardSpace
+from rl_coach.core_types import EnvResponse
+from rl_coach.filters.filter import InputFilter
+from collections import OrderedDict
+
+
+@pytest.mark.unit_test
+def test_filter():
+    rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
+    env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
+    print(rescale_filter.observation_filters)
+    result = rescale_filter.filter(env_response)[0]
+    unfiltered_reward = env_response.reward
+    filtered_reward = result.reward
+
+    # validate that the reward was clipped correctly
+    assert filtered_reward == 10
+
+    # make sure the original reward is unchanged
+    assert unfiltered_reward == 100
+
+    # negative reward
+    env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False)
+    result = rescale_filter.filter(env_response)[0]
+    assert result.reward == -5
+
+
+@pytest.mark.unit_test
+def test_get_filtered_reward_space():
+    rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
+
+    # reward is clipped
+    reward_space = RewardSpace(1, -100, 100)
+    filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
+
+    # make sure the new reward space shape is calculated correctly
+    assert filtered_reward_space.shape == 1
+    assert filtered_reward_space.low == -10
+    assert filtered_reward_space.high == 10
+
+    # unbounded rewards
+    reward_space = RewardSpace(1, -np.inf, np.inf)
+    filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
+
+    # make sure the new reward space shape is calculated correctly
+    assert filtered_reward_space.shape == 1
+    assert filtered_reward_space.low == -np.inf
+    assert filtered_reward_space.high == np.inf
--- a/rl_coach/tests/filters/test_filters_stacking.py
+++ b/rl_coach/tests/filters/test_filters_stacking.py
@@ -0,0 +1,70 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
+from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
+from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
+from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
+from rl_coach.filters.filter import InputFilter
+from rl_coach.spaces import ImageObservationSpace
+import numpy as np
+from rl_coach.core_types import EnvResponse
+from collections import OrderedDict
+
+
+@pytest.mark.filterwarnings('ignore:Conversion of')
+@pytest.mark.unit_test
+def test_filter_stacking():
+    # test that filter stacking works fine by taking as input a transition with:
+    # - an observation of shape 210x160,
+    # - a reward of 100
+    # filtering it by:
+    # - rescaling the observation to 110x84
+    # - cropping the observation to 84x84
+    # - clipping the reward to 1
+    # - stacking 4 observations to get 84x84x4
+
+    env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False)
+
+    filter1 = ObservationRescaleToSizeFilter(
+        output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255),
+        rescaling_interpolation_type=RescaleInterpolationType.BILINEAR
+    )
+
+    filter2 = ObservationCropFilter(
+        crop_low=np.array([16, 0]),
+        crop_high=np.array([100, 84])
+    )
+
+    filter3 = RewardClippingFilter(
+        clipping_low=-1,
+        clipping_high=1
+    )
+
+    output_filter = ObservationStackingFilter(
+        stack_size=4,
+        stacking_axis=-1
+    )
+
+    input_filter = InputFilter(
+        observation_filters={
+            "observation": OrderedDict([
+                ("filter1", filter1),
+                ("filter2", filter2),
+                ("output_filter", output_filter)
+            ])},
+        reward_filters=OrderedDict([
+            ("filter3", filter3)
+        ])
+    )
+
+    result = input_filter.filter(env_response)[0]
+    observation = np.array(result.next_state['observation'])
+    assert observation.shape == (84, 84, 4)
+    assert np.all(observation == np.ones([84, 84, 4]))
+    assert result.reward == 1
+
+
--- a/rl_coach/tests/golden_tests.py
+++ b/rl_coach/tests/golden_tests.py
@@ -0,0 +1,355 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import glob
+import os
+import shutil
+import signal
+import subprocess
+import sys
+from importlib import import_module
+from os import path
+sys.path.append('.')
+import numpy as np
+import pandas as pd
+import time
+
+# -*- coding: utf-8 -*-
+from rl_coach.logger import screen
+
+
+def read_csv_paths(test_path, filename_pattern, read_csv_tries=50):
+    csv_paths = []
+    tries_counter = 0
+    while not csv_paths:
+        csv_paths = glob.glob(path.join(test_path, '*', filename_pattern))
+        if tries_counter > read_csv_tries:
+            break
+        tries_counter += 1
+        time.sleep(1)
+    return csv_paths
+
+
+def clean_df(df):
+    if 'Wall-Clock Time' in df.keys():
+        df.drop(['Wall-Clock Time'], 1, inplace=True)
+    return df
+
+
+def print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args):
+    percentage = int((100 * last_num_episodes) / preset_validation_params.max_episodes_to_achieve_reward)
+    sys.stdout.write("\rReward: ({}/{})".format(round(averaged_rewards[-1], 1),
+                                                preset_validation_params.min_reward_threshold))
+    sys.stdout.write(' Time (sec): ({}/{})'.format(round(time.time() - start_time, 2), args.time_limit))
+    sys.stdout.write(' Episode: ({}/{})'.format(last_num_episodes,
+                                                preset_validation_params.max_episodes_to_achieve_reward))
+    sys.stdout.write(
+        ' {}%|{}{}|  '.format(percentage, '#' * int(percentage / 10), ' ' * (10 - int(percentage / 10))))
+    sys.stdout.flush()
+
+
+def perform_reward_based_tests(args, preset_validation_params, preset_name):
+    win_size = 10
+
+    test_name = '__test_reward'
+    test_path = os.path.join('./experiments', test_name)
+    if path.exists(test_path):
+        shutil.rmtree(test_path)
+
+    # run the experiment in a separate thread
+    screen.log_title("Running test {}".format(preset_name))
+    log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
+    cmd = (
+        'python3 rl_coach/coach.py '
+        '-p {preset_name} '
+        '-e {test_name} '
+        '-n {num_workers} '
+        '--seed 0 '
+        '-c '
+        '{level} '
+        '&> {log_file_name} '
+    ).format(
+        preset_name=preset_name,
+        test_name=test_name,
+        num_workers=preset_validation_params.num_workers,
+        log_file_name=log_file_name,
+        level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else ''
+    )
+
+    p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
+
+    start_time = time.time()
+
+    reward_str = 'Evaluation Reward'
+    if preset_validation_params.num_workers > 1:
+        filename_pattern = 'worker_0*.csv'
+    else:
+        filename_pattern = '*.csv'
+
+    test_passed = False
+
+    # get the csv with the results
+    csv_paths = read_csv_paths(test_path, filename_pattern)
+
+    if csv_paths:
+        csv_path = csv_paths[0]
+
+        # verify results
+        csv = None
+        time.sleep(1)
+        averaged_rewards = [0]
+
+        last_num_episodes = 0
+
+        if not args.no_progress_bar:
+            print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
+
+        while csv is None or (csv['Episode #'].values[
+                                  -1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit):
+            try:
+                csv = pd.read_csv(csv_path)
+            except:
+                # sometimes the csv is being written at the same time we are
+                # trying to read it. no problem -> try again
+                continue
+
+            if reward_str not in csv.keys():
+                continue
+
+            rewards = csv[reward_str].values
+            rewards = rewards[~np.isnan(rewards)]
+
+            if len(rewards) >= 1:
+                averaged_rewards = np.convolve(rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid')
+            else:
+                time.sleep(1)
+                continue
+
+            if not args.no_progress_bar:
+                print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
+
+            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
+                continue
+
+            last_num_episodes = csv['Episode #'].values[-1]
+
+            # check if reward is enough
+            if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold):
+                test_passed = True
+                break
+            time.sleep(1)
+
+    # kill test and print result
+    os.killpg(os.getpgid(p.pid), signal.SIGTERM)
+    screen.log('')
+    if test_passed:
+        screen.success("Passed successfully")
+    else:
+        if time.time() - start_time > args.time_limit:
+            screen.error("Failed due to exceeding time limit", crash=False)
+            if args.verbose:
+                screen.error("command exitcode: {}".format(p.returncode), crash=False)
+                screen.error(open(log_file_name).read(), crash=False)
+        elif csv_paths:
+            screen.error("Failed due to insufficient reward", crash=False)
+            if args.verbose:
+                screen.error("command exitcode: {}".format(p.returncode), crash=False)
+                screen.error(open(log_file_name).read(), crash=False)
+            screen.error("preset_validation_params.max_episodes_to_achieve_reward: {}".format(
+                preset_validation_params.max_episodes_to_achieve_reward), crash=False)
+            screen.error("preset_validation_params.min_reward_threshold: {}".format(
+                preset_validation_params.min_reward_threshold), crash=False)
+            screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
+            screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
+        else:
+            screen.error("csv file never found", crash=False)
+            if args.verbose:
+                screen.error("command exitcode: {}".format(p.returncode), crash=False)
+                screen.error(open(log_file_name).read(), crash=False)
+
+    shutil.rmtree(test_path)
+    os.remove(log_file_name)
+    return test_passed
+
+
+def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
+    test_name = '__test_trace'
+    test_path = os.path.join('./experiments', test_name)
+    if path.exists(test_path):
+        shutil.rmtree(test_path)
+
+    # run the experiment in a separate thread
+    screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else ''))
+    log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
+
+    cmd = (
+        'python3 rl_coach/coach.py '
+        '-p {preset_name} ' 
+        '-e {test_name} '
+        '--seed 42 '
+        '-c '
+        '--no_summary '
+        '-cp {custom_param} '
+        '{level} '
+        '&> {log_file_name} '
+    ).format(
+        preset_name=preset_name,
+        test_name=test_name,
+        log_file_name=log_file_name,
+        level='-lvl ' + level if level else '',
+        custom_param='\"improve_steps=EnvironmentSteps({n});'
+                     'steps_between_evaluation_periods=EnvironmentSteps({n});'
+                     'evaluation_steps=EnvironmentSteps(1);'
+                     'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)
+    )
+
+    p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
+    p.wait()
+
+    filename_pattern = '*.csv'
+
+    # get the csv with the results
+    csv_paths = read_csv_paths(test_path, filename_pattern)
+
+    test_passed = False
+    if not csv_paths:
+        screen.error("csv file never found", crash=False)
+        if args.verbose:
+            screen.error("command exitcode: {}".format(p.returncode), crash=False)
+            screen.error(open(log_file_name).read(), crash=False)
+    else:
+        trace_path = os.path.join('./rl_coach', 'traces', preset_name + '_' + level if level else preset_name, '')
+        if not os.path.exists(trace_path):
+            screen.log('No trace found, creating new trace in: {}'.format(trace_path))
+            os.makedirs(os.path.dirname(trace_path))
+            df = pd.read_csv(csv_paths[0])
+            df = clean_df(df)
+            df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
+            screen.success("Successfully created new trace.")
+            test_passed = True
+        else:
+            test_df = pd.read_csv(csv_paths[0])
+            test_df = clean_df(test_df)
+            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
+            test_df.to_csv(new_trace_csv_path, index=False)
+            test_df = pd.read_csv(new_trace_csv_path)
+            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
+            trace_csv_path = trace_csv_path[0]
+            trace_df = pd.read_csv(trace_csv_path)
+            test_passed = test_df.equals(trace_df)
+            if test_passed:
+                screen.success("Passed successfully.")
+                os.remove(new_trace_csv_path)
+            else:
+                screen.error("Trace test failed.", crash=False)
+                if args.overwrite:
+                    os.remove(trace_csv_path)
+                    os.rename(new_trace_csv_path, trace_csv_path)
+                    screen.error("Overwriting old trace.", crash=False)
+                else:
+                    screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)
+
+    shutil.rmtree(test_path)
+    os.remove(log_file_name)
+    return test_passed
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-t', '--trace',
+                        help="(flag) perform trace based testing",
+                        action='store_true')
+    parser.add_argument('-p', '--preset',
+                        help="(string) Name of a preset to run (as configured in presets.py)",
+                        default=None,
+                        type=str)
+    parser.add_argument('-ip', '--ignore_presets',
+                        help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
+                        default=None,
+                        type=str)
+    parser.add_argument('-v', '--verbose',
+                        help="(flag) display verbose logs in the event of an error",
+                        action='store_true')
+    parser.add_argument('--stop_after_first_failure',
+                        help="(flag) stop executing tests after the first error",
+                        action='store_true')
+    parser.add_argument('-tl', '--time_limit',
+                        help="time limit for each test in minutes",
+                        default=40,  # setting time limit to be so high due to DDPG being very slow - its tests are long
+                        type=int)
+    parser.add_argument('-np', '--no_progress_bar',
+                        help="(flag) Don't print the progress bar (makes jenkins logs more readable)",
+                        action='store_true')
+    parser.add_argument('-ow', '--overwrite',
+                        help="(flag) overwrite old trace with new ones in trace testing mode",
+                        action='store_true')
+
+    args = parser.parse_args()
+    if args.preset is not None:
+        presets_lists = [args.preset]
+    else:
+        # presets_lists = list_all_classes_in_module(presets)
+        presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
+                         f[-3:] == '.py' and not f == '__init__.py']
+
+    fail_count = 0
+    test_count = 0
+
+    args.time_limit = 60 * args.time_limit
+
+    if args.ignore_presets is not None:
+        presets_to_ignore = args.ignore_presets.split(',')
+    else:
+        presets_to_ignore = []
+    for idx, preset_name in enumerate(sorted(presets_lists)):
+        if args.stop_after_first_failure and fail_count > 0:
+            break
+        if preset_name not in presets_to_ignore:
+            try:
+                preset = import_module('rl_coach.presets.{}'.format(preset_name))
+            except:
+                if args.verbose:
+                    screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
+                continue
+
+            preset_validation_params = preset.graph_manager.preset_validation_params
+            if not args.trace and not preset_validation_params.test:
+                continue
+
+            if args.trace:
+                num_env_steps = preset_validation_params.trace_max_env_steps
+                if preset_validation_params.trace_test_levels:
+                    for level in preset_validation_params.trace_test_levels:
+                        test_count += 1
+                        test_passed = perform_trace_based_tests(args, preset_name, num_env_steps, level)
+                else:
+                    test_count += 1
+                    test_passed = perform_trace_based_tests(args, preset_name, num_env_steps)
+            else:
+                test_passed = perform_reward_based_tests(args, preset_validation_params, preset_name)
+            if not test_passed:
+                fail_count += 1
+
+    screen.separator()
+    if fail_count == 0:
+        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
+    else:
+        screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
+
+
+if __name__ == '__main__':
+    main()
--- a/rl_coach/tests/graph_managers/init.py
+++ b/rl_coach/tests/graph_managers/init.py
--- a/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
+++ b/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
@@ -0,0 +1,52 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+import tensorflow as tf
+from rl_coach.base_parameters import TaskParameters, DistributedTaskParameters
+from rl_coach.utils import get_open_port
+from multiprocessing import Process
+from tensorflow import logging
+import pytest
+logging.set_verbosity(logging.INFO)
+
+
+@pytest.mark.unit_test
+def test_basic_rl_graph_manager_with_pong_a3c():
+    tf.reset_default_graph()
+    from rl_coach.presets.Atari_A3C import graph_manager
+    assert graph_manager
+    graph_manager.env_params.level = "PongDeterministic-v4"
+    graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
+                                                              experiment_path="./experiments/test"))
+    # graph_manager.improve()
+
+
+@pytest.mark.unit_test
+def test_basic_rl_graph_manager_with_pong_nec():
+    tf.reset_default_graph()
+    from rl_coach.presets.Atari_NEC import graph_manager
+    assert graph_manager
+    graph_manager.env_params.level = "PongDeterministic-v4"
+    graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
+                                                              experiment_path="./experiments/test"))
+    # graph_manager.improve()
+
+
+@pytest.mark.unit_test
+def test_basic_rl_graph_manager_with_cartpole_dqn():
+    tf.reset_default_graph()
+    from rl_coach.presets.CartPole_DQN import graph_manager
+    assert graph_manager
+    graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
+                                                              experiment_path="./experiments/test"))
+    # graph_manager.improve()
+
+
+if __name__ == '__main__':
+    pass
+    # test_basic_rl_graph_manager_with_pong_a3c()
+    # test_basic_rl_graph_manager_with_ant_a3c()
+    # test_basic_rl_graph_manager_with_pong_nec()
+	# test_basic_rl_graph_manager_with_cartpole_dqn()
+    #test_basic_rl_graph_manager_multithreaded_with_pong_a3c()
+	#test_basic_rl_graph_manager_with_doom_basic_dqn()
--- a/rl_coach/tests/memories/init.py
+++ b/rl_coach/tests/memories/init.py
--- a/rl_coach/tests/memories/test_differential_neural_dictionary.py
+++ b/rl_coach/tests/memories/test_differential_neural_dictionary.py
@@ -0,0 +1,91 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+import time
+from rl_coach.memories.non_episodic.differentiable_neural_dictionary import QDND
+import tensorflow as tf
+
+NUM_ACTIONS = 3
+NUM_DND_ENTRIES_TO_ADD = 10000
+EMBEDDING_SIZE = 512
+NUM_SAMPLED_EMBEDDINGS = 500
+NUM_NEIGHBORS = 10
+DND_SIZE = 500000
+
+@pytest.fixture()
+def dnd():
+    return QDND(
+                DND_SIZE,
+                EMBEDDING_SIZE,
+                NUM_ACTIONS,
+                0.1,
+                key_error_threshold=0,
+                learning_rate=0.0001,
+                num_neighbors=NUM_NEIGHBORS
+                )
+
+
+@pytest.mark.unit_test
+def test_random_sample_from_dnd(dnd: QDND):
+    # store single non terminal transition
+    embeddings = [np.random.rand(EMBEDDING_SIZE) for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    actions = [np.random.randint(NUM_ACTIONS) for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    values = [np.random.rand() for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    dnd.add(embeddings, actions, values)
+    dnd_embeddings, dnd_values, dnd_indices = dnd.query(embeddings[0:10], 0, NUM_NEIGHBORS)
+
+    # calculate_normalization_factor
+    sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
+    coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
+    tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
+    tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
+
+    sub = tf_current_embedding - tf_other_embeddings
+    square = tf.square(sub)
+    result = tf.reduce_sum(square)
+
+
+
+    ###########################
+    # more efficient method
+    ###########################
+    sampled_embeddings_expanded = tf.placeholder(
+        tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
+    sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
+    sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
+    sub2 = sampled_embeddings_tiled - sampled_embeddings_transposed
+    square2 = tf.square(sub2)
+    result2 = tf.reduce_sum(square2)
+
+    config = tf.ConfigProto()
+    config.allow_soft_placement = True  # allow placing ops on cpu if they are not fit for gpu
+    config.gpu_options.allow_growth = True  # allow the gpu memory allocated for the worker to grow if needed
+
+    sess = tf.Session(config=config)
+
+    sum1 = 0
+    start = time.time()
+    for i in range(NUM_SAMPLED_EMBEDDINGS):
+        curr_sampled_embedding = sampled_embeddings[i]
+        other_embeddings = np.delete(sampled_embeddings, i, 0)
+        sum1 += sess.run(result, feed_dict={tf_current_embedding: curr_sampled_embedding, tf_other_embeddings: other_embeddings})
+    print("1st method: {} sec".format(time.time()-start))
+
+    start = time.time()
+    sum2 = sess.run(result2, feed_dict={sampled_embeddings_expanded: np.expand_dims(sampled_embeddings,0)})
+    print("2nd method: {} sec".format(time.time()-start))
+
+    # validate that results are equal
+    print("sum1 = {}, sum2 = {}".format(sum1, sum2))
+
+    norm_factor = -0.5/(coefficient * sum2)
+
+if __name__ == '__main__':
+    test_random_sample_from_dnd(dnd())
+
--- a/rl_coach/tests/memories/test_hindsight_experience_replay.py
+++ b/rl_coach/tests/memories/test_hindsight_experience_replay.py
@@ -0,0 +1,97 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters
+from rl_coach.spaces import GoalsSpace, ReachingGoal
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+# print(sys.path)
+
+import pytest
+import numpy as np
+
+from rl_coach.core_types import Transition, Episode
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplay, \
+     HindsightGoalSelectionMethod
+
+
+#TODO: change from defining a new class to creating an instance from the parameters
+class Parameters(EpisodicHindsightExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 100)
+        self.hindsight_transitions_per_regular_transition = 4
+        self.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
+        self.goals_space = GoalsSpace(goal_name='observation',
+                                      reward_type=ReachingGoal(distance_from_goal_threshold=0.1),
+                                      distance_metric=GoalsSpace.DistanceMetric.Euclidean)
+
+
+@pytest.fixture
+def episode():
+    episode = []
+    for i in range(10):
+        episode.append(Transition(
+            state={'observation': np.array([i]), 'desired_goal': np.array([i]), 'achieved_goal': np.array([i])},
+            action=i,
+        ))
+    return episode
+
+
+@pytest.fixture
+def her():
+    params = Parameters().__dict__
+
+    import inspect
+    args = set(inspect.getfullargspec(EpisodicHindsightExperienceReplay.__init__).args).intersection(params)
+    params = {k: params[k] for k in args}
+
+    return EpisodicHindsightExperienceReplay(**params)
+
+
+@pytest.mark.unit_test
+def test_sample_goal(her, episode):
+    assert her._sample_goal(episode, 8) == 9
+
+
+@pytest.mark.unit_test
+def test_sample_goal_range(her, episode):
+    unseen_goals = set(range(1, 9))
+    for _ in range(500):
+        unseen_goals -= set([int(her._sample_goal(episode, 0))])
+        if not unseen_goals:
+            return
+
+    assert unseen_goals == set()
+
+
+@pytest.mark.unit_test
+def test_update_episode(her):
+    episode = Episode()
+    for i in range(10):
+        episode.insert(Transition(
+            state={'observation': np.array([i]), 'desired_goal': np.array([i+1]), 'achieved_goal': np.array([i+1])},
+            action=i,
+            game_over=i == 9,
+            reward=0 if i == 9 else -1,
+        ))
+
+    her.store_episode(episode)
+    # print('her._num_transitions', her._num_transitions)
+
+    # 10 original transitions, and 9 transitions * 4 hindsight episodes
+    assert her.num_transitions() == 10 + (4 * 9)
+
+    # make sure that the goal state was never sampled from the past
+    for transition in her.transitions:
+        assert transition.state['desired_goal'] > transition.state['observation']
+        assert transition.next_state['desired_goal'] >= transition.next_state['observation']
+
+        if transition.reward == 0:
+            assert transition.game_over
+        else:
+            assert not transition.game_over
+
+test_update_episode(her())
--- a/rl_coach/tests/memories/test_prioritized_experience_replay.py
+++ b/rl_coach/tests/memories/test_prioritized_experience_replay.py
@@ -0,0 +1,93 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.memories.non_episodic.prioritized_experience_replay import SegmentTree
+
+
+@pytest.mark.unit_test
+def test_sum_tree():
+    # test power of 2 sum tree
+    sum_tree = SegmentTree(size=4, operation=SegmentTree.Operation.SUM)
+    sum_tree.add(10, "10")
+    assert sum_tree.total_value() == 10
+    sum_tree.add(20, "20")
+    assert sum_tree.total_value() == 30
+    sum_tree.add(5, "5")
+    assert sum_tree.total_value() == 35
+    sum_tree.add(7.5, "7.5")
+    assert sum_tree.total_value() == 42.5
+    sum_tree.add(2.5, "2.5")
+    assert sum_tree.total_value() == 35
+    sum_tree.add(5, "5")
+    assert sum_tree.total_value() == 20
+
+    assert sum_tree.get(2) == (0, 2.5, '2.5')
+    assert sum_tree.get(3) == (1, 5.0, '5')
+    assert sum_tree.get(10) == (2, 5.0, '5')
+    assert sum_tree.get(13) == (3, 7.5, '7.5')
+
+    sum_tree.update(2, 10)
+    assert sum_tree.__str__() == "[25.]\n[ 7.5 17.5]\n[ 2.5  5.  10.   7.5]\n"
+
+    # test non power of 2 sum tree
+    with pytest.raises(ValueError):
+        sum_tree = SegmentTree(size=5, operation=SegmentTree.Operation.SUM)
+
+
+@pytest.mark.unit_test
+def test_min_tree():
+    min_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MIN)
+    min_tree.add(10, "10")
+    assert min_tree.total_value() == 10
+    min_tree.add(20, "20")
+    assert min_tree.total_value() == 10
+    min_tree.add(5, "5")
+    assert min_tree.total_value() == 5
+    min_tree.add(7.5, "7.5")
+    assert min_tree.total_value() == 5
+    min_tree.add(2, "2")
+    assert min_tree.total_value() == 2
+    min_tree.add(3, "3")
+    min_tree.add(3, "3")
+    min_tree.add(3, "3")
+    min_tree.add(5, "5")
+    assert min_tree.total_value() == 3
+
+
+@pytest.mark.unit_test
+def test_max_tree():
+    max_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MAX)
+    max_tree.add(10, "10")
+    assert max_tree.total_value() == 10
+    max_tree.add(20, "20")
+    assert max_tree.total_value() == 20
+    max_tree.add(5, "5")
+    assert max_tree.total_value() == 20
+    max_tree.add(7.5, "7.5")
+    assert max_tree.total_value() == 20
+    max_tree.add(2, "2")
+    assert max_tree.total_value() == 20
+    max_tree.add(3, "3")
+    max_tree.add(3, "3")
+    max_tree.add(3, "3")
+    max_tree.add(5, "5")
+    assert max_tree.total_value() == 5
+
+    # update
+    max_tree.update(1, 10)
+    assert max_tree.total_value() == 10
+    assert max_tree.__str__() == "[10.]\n[10.  3.]\n[ 5. 10.  3.  3.]\n"
+    max_tree.update(1, 2)
+    assert max_tree.total_value() == 5
+    assert max_tree.__str__() == "[5.]\n[5. 3.]\n[5. 2. 3. 3.]\n"
+
+
+if __name__ == "__main__":
+    test_sum_tree()
+    test_min_tree()
+    test_max_tree()
--- a/rl_coach/tests/memories/test_single_episode_buffer.py
+++ b/rl_coach/tests/memories/test_single_episode_buffer.py
@@ -0,0 +1,81 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.core_types import Transition
+from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBuffer
+
+
+@pytest.fixture()
+def buffer():
+    return SingleEpisodeBuffer()
+
+
+@pytest.mark.unit_test
+def test_store_and_get(buffer: SingleEpisodeBuffer):
+    # store single non terminal transition
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 0
+    assert buffer.num_transitions_in_complete_episodes() == 0
+    assert buffer.num_transitions() == 1
+
+    # get the single stored transition
+    episode = buffer.get(0)
+    assert episode.length() == 1
+    assert episode.get_first_transition() is transition    # check addresses are the same
+    assert episode.get_last_transition() is transition   # check addresses are the same
+
+    # store single terminal transition
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 1
+    assert buffer.num_transitions_in_complete_episodes() == 2
+
+    # check that the episode is valid
+    episode = buffer.get(0)
+    assert episode.length() == 2
+    assert episode.get_transition(0).total_return == 1 + 0.99
+    assert episode.get_transition(1).total_return == 1
+    assert buffer.mean_reward() == 1
+
+    # only one episode in the replay buffer
+    episode = buffer.get(1)
+    assert episode is None
+
+    # adding transitions after the first episode was closed
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=0, game_over=False)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 0
+    assert buffer.num_transitions_in_complete_episodes() == 0
+
+    # still only one episode
+    assert buffer.get(1) is None
+    assert buffer.mean_reward() == 0
+
+
+@pytest.mark.unit_test
+def test_clean(buffer: SingleEpisodeBuffer):
+    # add several transitions and then clean the buffer
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
+    for i in range(10):
+        buffer.store(transition)
+    assert buffer.num_transitions() == 10
+    buffer.clean()
+    assert buffer.num_transitions() == 0
+
+    # add more transitions after the clean and make sure they were really cleaned
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
+    buffer.store(transition)
+    assert buffer.num_transitions() == 1
+    assert buffer.num_transitions_in_complete_episodes() == 1
+    assert buffer.num_complete_episodes() == 1
+    for i in range(10):
+        assert buffer.sample(1)[0] is transition
--- a/rl_coach/tests/presets/init.py
+++ b/rl_coach/tests/presets/init.py
--- a/rl_coach/tests/presets/test_presets.py
+++ b/rl_coach/tests/presets/test_presets.py
@@ -0,0 +1,56 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import os
+import time
+import shutil
+from subprocess import Popen, DEVNULL
+from rl_coach.logger import screen
+
+
+@pytest.mark.integration_test
+def test_all_presets_are_running():
+    # os.chdir("../../")
+    test_failed = False
+    all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py'])
+    for preset in all_presets:
+        print("Testing preset {}".format(preset))
+
+        # TODO: this is a temporary workaround for presets which define more than a single available level.
+        # we should probably do this in a more robust way
+        level = ""
+        if "Atari" in preset:
+            level = "breakout"
+        elif "Mujoco" in preset:
+            level = "inverted_pendulum"
+        elif "ControlSuite" in preset:
+            level = "pendulum:swingup"
+        params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"]
+        if level != "":
+            params += ["-lvl", level]
+
+        p = Popen(params, stdout=DEVNULL)
+
+        # wait 10 seconds overhead of initialization etc.
+        time.sleep(10)
+        return_value = p.poll()
+
+        if return_value is None:
+            screen.success("{} passed successfully".format(preset))
+        else:
+            test_failed = True
+            screen.error("{} failed".format(preset), crash=False)
+
+        p.kill()
+        if os.path.exists("experiments/.test"):
+            shutil.rmtree("experiments/.test")
+
+    assert not test_failed
+
+
+if __name__ == "__main__":
+    test_all_presets_are_running()
--- a/rl_coach/tests/pytest.ini
+++ b/rl_coach/tests/pytest.ini
@@ -0,0 +1,5 @@
+# content of pytest.ini
+[pytest]
+markers =
+    unit_test: short test that checks that a module is acting correctly
+    integration_test: long test that checks that the complete framework is running correctly
--- a/rl_coach/tests/test_schedules.py
+++ b/rl_coach/tests/test_schedules.py
@@ -0,0 +1,106 @@
+import os
+import sys
+
+from rl_coach.core_types import EnvironmentSteps
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.schedules import LinearSchedule, ConstantSchedule, ExponentialSchedule, PieceWiseSchedule
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_constant_schedule():
+    schedule = ConstantSchedule(0.3)
+
+    # make sure the values in the constant schedule don't change over time
+    for i in range(1000):
+        assert schedule.initial_value == 0.3
+        assert schedule.current_value == 0.3
+        schedule.step()
+
+
+@pytest.mark.unit_test
+def test_linear_schedule():
+    # increasing schedule
+    schedule = LinearSchedule(1, 3, 10)
+
+    # the schedule is defined in number of steps to get from 1 to 3 so there are 10 steps
+    # the linspace is defined in number of bins between 1 and 3 so theres are 11 bins
+    target_values = np.linspace(1, 3, 11)
+    for i in range(10):
+        # we round to 4 because there is a very small floating point division difference (1e-10)
+        assert round(schedule.current_value, 4) == round(target_values[i], 4)
+        schedule.step()
+
+    # make sure the value does not change after 10 steps
+    for i in range(10):
+        assert schedule.current_value == 3
+
+    # decreasing schedule
+    schedule = LinearSchedule(3, 1, 10)
+
+    target_values = np.linspace(3, 1, 11)
+    for i in range(10):
+        # we round to 4 because there is a very small floating point division difference (1e-10)
+        assert round(schedule.current_value, 4) == round(target_values[i], 4)
+        schedule.step()
+
+    # make sure the value does not change after 10 steps
+    for i in range(10):
+        assert schedule.current_value == 1
+
+    # constant schedule
+    schedule = LinearSchedule(3, 3, 10)
+
+    for i in range(10):
+        # we round to 4 because there is a very small floating point division difference (1e-10)
+        assert round(schedule.current_value, 4) == 3
+        schedule.step()
+
+
+@pytest.mark.unit_test
+def test_exponential_schedule():
+    # decreasing schedule
+    schedule = ExponentialSchedule(10, 3, 0.99)
+
+    current_power = 1
+    for i in range(100):
+        assert round(schedule.current_value,6) == round(10*current_power,6)
+        current_power *= 0.99
+        schedule.step()
+
+    for i in range(100):
+        schedule.step()
+    assert schedule.current_value == 3
+
+
+@pytest.mark.unit_test
+def test_piece_wise_schedule():
+    # decreasing schedule
+    schedule = PieceWiseSchedule(
+        [(LinearSchedule(1, 3, 10), EnvironmentSteps(5)),
+         (ConstantSchedule(4), EnvironmentSteps(10)),
+         (ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10))
+         ]
+    )
+
+    target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4)
+    for i in range(16):
+        assert round(schedule.current_value, 4) == round(target_values[i], 4)
+        schedule.step()
+
+    current_power = 1
+    for i in range(10):
+        assert round(schedule.current_value, 4) == round(3*current_power, 4)
+        current_power *= 0.99
+        schedule.step()
+
+
+if __name__ == "__main__":
+    test_constant_schedule()
+    test_linear_schedule()
+    test_exponential_schedule()
+    test_piece_wise_schedule()
--- a/rl_coach/tests/test_spaces.py
+++ b/rl_coach/tests/test_spaces.py
@@ -0,0 +1,198 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, MultiSelectActionSpace, ObservationSpace, AgentSelection, VectorObservationSpace, AttentionActionSpace
+import numpy as np
+
+
+@pytest.mark.unit_test
+def test_discrete():
+    action_space = DiscreteActionSpace(3, ["zero", "one", "two"])
+    assert action_space.shape == 1
+    for i in range(100):
+        assert 3 > action_space.sample() >= 0
+    action_info = action_space.sample_with_info()
+    assert action_info.action_probability == 1. / 3
+    assert action_space.high == 2
+    assert action_space.low == 0
+
+    # list descriptions
+    assert action_space.get_description(1) == "one"
+
+    # dict descriptions
+    action_space = DiscreteActionSpace(3, {1: "one", 2: "two", 0: "zero"})
+    assert action_space.get_description(0) == "zero"
+
+    # no descriptions
+    action_space = DiscreteActionSpace(3)
+    assert action_space.get_description(0) == "0"
+
+    # descriptions for invalid action
+    with pytest.raises(ValueError):
+        assert action_space.get_description(3) == "0"
+
+
+@pytest.mark.unit_test
+def test_box():
+    # simple action space
+    action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"])
+    for i in range(100):
+        sample = action_space.sample()
+        assert np.all(-5 <= sample) and np.all(sample <= 5)
+        assert sample.shape == (4,)
+        assert sample.dtype == float
+
+    # test clipping
+    clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
+    assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
+
+    # more complex high and low definition
+    action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), np.array([1, 2, 4, 5]), ["a", "b", "c", "d"])
+    for i in range(100):
+        sample = action_space.sample()
+        assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= np.array([1, 2, 4, 5]))
+        assert sample.shape == (4,)
+        assert sample.dtype == float
+
+    # test clipping
+    clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
+    assert np.all(clipped_action == np.array([-5, 2, 2, 5]))
+
+    # mixed high and low definition
+    action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5, ["a", "b", "c", "d"])
+    for i in range(100):
+        sample = action_space.sample()
+        assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= 5)
+        assert sample.shape == (4,)
+        assert sample.dtype == float
+
+    # test clipping
+    clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
+    assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
+
+    # invalid bounds
+    with pytest.raises(ValueError):
+        action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1, ["a", "b", "c", "d"])
+
+    # TODO: test descriptions
+
+
+@pytest.mark.unit_test
+def test_multiselect():
+    action_space = MultiSelectActionSpace(4, 2, ["a", "b", "c", "d"])
+    for i in range(100):
+        action = action_space.sample()
+        assert action.shape == (4,)
+        assert np.sum(action) <= 2
+
+    # check that descriptions of multiple actions are working
+    description = action_space.get_description(np.array([1, 0, 1, 0]))
+    assert description == "a + c"
+
+    description = action_space.get_description(np.array([0, 0, 0, 0]))
+    assert description == "no-op"
+
+
+@pytest.mark.unit_test
+def test_attention():
+    low = np.array([-1, -2, -3, -4])
+    high = np.array([1, 2, 3, 4])
+    action_space = AttentionActionSpace(4, low=low, high=high)
+    for i in range(100):
+        action = action_space.sample()
+        assert len(action) == 2
+        assert action[0].shape == (4,)
+        assert action[1].shape == (4,)
+        assert np.all(action[0] <= action[1])
+        assert np.all(action[0] >= low)
+        assert np.all(action[1] < high)
+
+
+@pytest.mark.unit_test
+def test_goal():
+    # TODO: test goal action space
+    pass
+
+
+@pytest.mark.unit_test
+def test_agent_selection():
+    action_space = AgentSelection(10)
+
+    assert action_space.shape == 1
+    assert action_space.high == 9
+    assert action_space.low == 0
+    with pytest.raises(ValueError):
+        assert action_space.get_description(10)
+    assert action_space.get_description(0) == "0"
+
+
+@pytest.mark.unit_test
+def test_observation_space():
+    observation_space = ObservationSpace(np.array([1, 10]), -10, 10)
+
+    # testing that val_matches_space_definition works
+    assert observation_space.val_matches_space_definition(np.ones([1, 10]))
+    assert not observation_space.val_matches_space_definition(np.ones([2, 10]))
+    assert not observation_space.val_matches_space_definition(np.ones([1, 10]) * 100)
+    assert not observation_space.val_matches_space_definition(np.ones([1, 1, 10]))
+
+    # is_point_in_space_shape
+    assert observation_space.is_point_in_space_shape(np.array([0, 9]))
+    assert observation_space.is_point_in_space_shape(np.array([0, 0]))
+    assert not observation_space.is_point_in_space_shape(np.array([1, 8]))
+    assert not observation_space.is_point_in_space_shape(np.array([0, 10]))
+    assert not observation_space.is_point_in_space_shape(np.array([-1, 6]))
+
+
+@pytest.mark.unit_test
+def test_image_observation_space():
+    # TODO: test image observation space
+    pass
+
+
+@pytest.mark.unit_test
+def test_measurements_observation_space():
+    # empty measurements space
+    measurements_space = VectorObservationSpace(0)
+
+    # vector space
+    measurements_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
+
+
+@pytest.mark.unit_test
+def test_reward_space():
+    # TODO: test reward space
+    pass
+
+
+# def test_discrete_to_linspace_action_space_map():
+#     box = BoxActionSpace(2, np.array([0, 0]), np.array([10, 10]))
+#     linspace = BoxDiscretization(box, [5, 3])
+#     assert np.all(linspace.actions == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]))
+#     assert np.all(linspace.target_actions ==
+#                   np.array([[0.0, 0.0], [0.0, 5.0], [0.0, 10.0],
+#                             [2.5, 0.0], [2.5, 5.0], [2.5, 10.0],
+#                             [5.0, 0.0], [5.0, 5.0], [5.0, 10.0],
+#                             [7.5, 0.0], [7.5, 5.0], [7.5, 10.0],
+#                             [10.0, 0.0], [10.0, 5.0], [10.0, 10.0]]))
+#
+#
+# def test_discrete_to_attention_action_space_map():
+#     attention = AttentionActionSpace(2, np.array([0, 0]), np.array([10, 10]))
+#     linspace = AttentionDiscretization(attention, 2)
+#     assert np.all(linspace.actions == np.array([0, 1, 2, 3]))
+#     assert np.all(linspace.target_actions ==
+#                   np.array(
+#                       [[[0., 0.], [5., 5.]],
+#                       [[0., 5.], [5., 10.]],
+#                       [[5., 0.], [10., 5.]],
+#                       [[5., 5.], [10., 10.]]])
+#                   )
+
+
+if __name__ == "__main__":
+    test_observation_space()
+    test_discrete_to_linspace_action_space_map()
+    test_discrete_to_attention_action_space_map()