1
0
mirror of https://github.com/gryf/coach.git synced 2026-02-15 21:45:46 +01:00

pre-release 0.10.0

This commit is contained in:
Gal Novik
2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions

View File

View File

View File

@@ -0,0 +1,33 @@
import os
import sys
from rl_coach.base_parameters import TaskParameters
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import tensorflow as tf
from tensorflow import logging
import pytest
logging.set_verbosity(logging.INFO)
@pytest.mark.unit_test
def test_get_QActionStateValue_predictions():
tf.reset_default_graph()
from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
assert cartpole_dqn_graph_manager
cartpole_dqn_graph_manager.create_graph(task_parameters=
TaskParameters(framework_type="tensorflow",
experiment_path="./experiments/test"))
cartpole_dqn_graph_manager.improve_steps.num_steps = 1
cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
# graph_manager.improve()
#
# agent = graph_manager.level_managers[0].composite_agents['simple_rl_agent'].agents['simple_rl_agent/agent']
# some_state = agent.memory.sample(1)[0].state
# cartpole_dqn_predictions = agent.get_predictions(states=some_state, prediction_type=QActionStateValue)
# assert cartpole_dqn_predictions.shape == (1, 2)
if __name__ == '__main__':
test_get_QActionStateValue_predictions()

View File

View File

@@ -0,0 +1,45 @@
import os
import sys
from rl_coach.base_parameters import EmbedderScheme
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder
import tensorflow as tf
from tensorflow import logging
logging.set_verbosity(logging.INFO)
@pytest.fixture
def reset():
tf.reset_default_graph()
@pytest.mark.unit_test
def test_embedder(reset):
embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty)
# make sure the ops where not created yet
assert len(tf.get_default_graph().get_operations()) == 0
# call the embedder
input_ph, output_ph = embedder()
# make sure that now the ops were created
assert len(tf.get_default_graph().get_operations()) > 0
# try feeding a batch of one example # TODO: consider auto converting to batch
input = np.random.rand(1, 10, 10)
sess = tf.Session()
output = sess.run(embedder.output, {embedder.input: input})
assert output.shape == (1, 100) # should have flattened the input
# now make sure the returned placeholders behave the same
output = sess.run(output_ph, {input_ph: input})
assert output.shape == (1, 100) # should have flattened the input
# make sure the naming is correct
assert embedder.get_name() == "test"

View File

@@ -0,0 +1,99 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.architectures.tensorflow_components.embedders.image_embedder import ImageEmbedder, EmbedderScheme
import tensorflow as tf
from tensorflow import logging
logging.set_verbosity(logging.INFO)
@pytest.fixture
def reset():
tf.reset_default_graph()
@pytest.mark.unit_test
def test_embedder(reset):
# creating an embedder with a non-image input
with pytest.raises(ValueError):
embedder = ImageEmbedder(np.array([100]), name="test")
with pytest.raises(ValueError):
embedder = ImageEmbedder(np.array([100, 100]), name="test")
with pytest.raises(ValueError):
embedder = ImageEmbedder(np.array([10, 100, 100, 100]), name="test")
# creating a simple image embedder
embedder = ImageEmbedder(np.array([100, 100, 10]), name="test")
# make sure the ops where not created yet
assert len(tf.get_default_graph().get_operations()) == 0
# call the embedder
input_ph, output_ph = embedder()
# make sure that now the ops were created
assert len(tf.get_default_graph().get_operations()) > 0
# try feeding a batch of one example
input = np.random.rand(1, 100, 100, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert output.shape == (1, 5184)
# now make sure the returned placeholders behave the same
output = sess.run(output_ph, {input_ph: input})
assert output.shape == (1, 5184)
# make sure the naming is correct
assert embedder.get_name() == "test"
@pytest.mark.unit_test
def test_complex_embedder(reset):
# creating a deep vector embedder
embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", scheme=EmbedderScheme.Deep)
# call the embedder
embedder()
# try feeding a batch of one example
input = np.random.rand(1, 100, 100, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert output.shape == (1, 256) # should have flattened the input
@pytest.mark.unit_test
def test_activation_function(reset):
# creating a deep image embedder with relu
embedder = ImageEmbedder(np.array([100, 100, 10]), name="relu", scheme=EmbedderScheme.Deep,
activation_function=tf.nn.relu)
# call the embedder
embedder()
# try feeding a batch of one example
input = np.random.rand(1, 100, 100, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert np.all(output >= 0) # should have flattened the input
# creating a deep image embedder with tanh
embedder_tanh = ImageEmbedder(np.array([100, 100, 10]), name="tanh", scheme=EmbedderScheme.Deep,
activation_function=tf.nn.tanh)
# call the embedder
embedder_tanh()
# try feeding a batch of one example
input = np.random.rand(1, 100, 100, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
assert np.all(output >= -1) and np.all(output <= 1)

View File

@@ -0,0 +1,95 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder, EmbedderScheme
import tensorflow as tf
from tensorflow import logging
logging.set_verbosity(logging.INFO)
@pytest.fixture
def reset():
tf.reset_default_graph()
@pytest.mark.unit_test
def test_embedder(reset):
# creating a vector embedder with a matrix
with pytest.raises(ValueError):
embedder = VectorEmbedder(np.array([10, 10]), name="test")
# creating a simple vector embedder
embedder = VectorEmbedder(np.array([10]), name="test")
# make sure the ops where not created yet
assert len(tf.get_default_graph().get_operations()) == 0
# call the embedder
input_ph, output_ph = embedder()
# make sure that now the ops were created
assert len(tf.get_default_graph().get_operations()) > 0
# try feeding a batch of one example
input = np.random.rand(1, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert output.shape == (1, 256)
# now make sure the returned placeholders behave the same
output = sess.run(output_ph, {input_ph: input})
assert output.shape == (1, 256)
# make sure the naming is correct
assert embedder.get_name() == "test"
@pytest.mark.unit_test
def test_complex_embedder(reset):
# creating a deep vector embedder
embedder = VectorEmbedder(np.array([10]), name="test", scheme=EmbedderScheme.Deep)
# call the embedder
embedder()
# try feeding a batch of one example
input = np.random.rand(1, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert output.shape == (1, 128) # should have flattened the input
@pytest.mark.unit_test
def test_activation_function(reset):
# creating a deep vector embedder with relu
embedder = VectorEmbedder(np.array([10]), name="relu", scheme=EmbedderScheme.Deep,
activation_function=tf.nn.relu)
# call the embedder
embedder()
# try feeding a batch of one example
input = np.random.rand(1, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder.output, {embedder.input: input})
assert np.all(output >= 0) # should have flattened the input
# creating a deep vector embedder with tanh
embedder_tanh = VectorEmbedder(np.array([10]), name="tanh", scheme=EmbedderScheme.Deep,
activation_function=tf.nn.tanh)
# call the embedder
embedder_tanh()
# try feeding a batch of one example
input = np.random.rand(1, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
assert np.all(output >= -1) and np.all(output <= 1)

View File

View File

@@ -0,0 +1,67 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.environments.gym_environment import GymEnvironment
from rl_coach.base_parameters import VisualizationParameters
import numpy as np
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, ImageObservationSpace, VectorObservationSpace
@pytest.fixture()
def atari_env():
# create a breakout gym environment
env = GymEnvironment(level='Breakout-v0',
seed=1,
frame_skip=4,
visualization_parameters=VisualizationParameters())
return env
@pytest.fixture()
def continuous_env():
# create a breakout gym environment
env = GymEnvironment(level='Pendulum-v0',
seed=1,
frame_skip=1,
visualization_parameters=VisualizationParameters())
return env
@pytest.mark.unit_test
def test_gym_discrete_environment(atari_env):
# observation space
assert type(atari_env.state_space['observation']) == ImageObservationSpace
assert np.all(atari_env.state_space['observation'].shape == [210, 160, 3])
assert np.all(atari_env.last_env_response.next_state['observation'].shape == (210, 160, 3))
# action space
assert type(atari_env.action_space) == DiscreteActionSpace
assert np.all(atari_env.action_space.high == 3)
# make sure that the seed is working properly
assert np.sum(atari_env.last_env_response.next_state['observation']) == 4115856
@pytest.mark.unit_test
def test_gym_continuous_environment(continuous_env):
# observation space
assert type(continuous_env.state_space['observation']) == VectorObservationSpace
assert np.all(continuous_env.state_space['observation'].shape == [3])
assert np.all(continuous_env.last_env_response.next_state['observation'].shape == (3,))
# action space
assert type(continuous_env.action_space) == BoxActionSpace
assert np.all(continuous_env.action_space.shape == np.array([1]))
# make sure that the seed is working properly
assert np.sum(continuous_env.last_env_response.next_state['observation']) == 1.2661630859028832
@pytest.mark.unit_test
def test_step(atari_env):
result = atari_env.step(0)
if __name__ == '__main__':
test_gym_continuous_environment(continuous_env())

View File

@@ -0,0 +1,44 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
from rl_coach.exploration_policies.additive_noise import AdditiveNoise
from rl_coach.schedules import LinearSchedule
import numpy as np
@pytest.mark.unit_test
def test_init():
# discrete control
action_space = DiscreteActionSpace(3)
noise_schedule = LinearSchedule(1.0, 1.0, 1000)
# additive noise doesn't work for discrete controls
with pytest.raises(ValueError):
policy = AdditiveNoise(action_space, noise_schedule, 0)
# additive noise requires a bounded range for the actions
action_space = BoxActionSpace(np.array([10]))
with pytest.raises(ValueError):
policy = AdditiveNoise(action_space, noise_schedule, 0)
@pytest.mark.unit_test
def test_get_action():
# make sure noise is in range
action_space = BoxActionSpace(np.array([10]), -1, 1)
noise_schedule = LinearSchedule(1.0, 1.0, 1000)
policy = AdditiveNoise(action_space, noise_schedule, 0)
# the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6
for i in range(1000):
action = policy.get_action(np.zeros([10]))
assert np.all(action < 10)
# make sure there is no clipping of the action since it should be the environment that clips actions
assert np.all(action != 1.0)
assert np.all(action != -1.0)
# make sure that each action element has a different value
assert np.all(action[0] != action[1:])

View File

@@ -0,0 +1,81 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.spaces import DiscreteActionSpace
from rl_coach.exploration_policies.e_greedy import EGreedy
from rl_coach.schedules import LinearSchedule
import numpy as np
from rl_coach.core_types import RunPhase
@pytest.mark.unit_test
def test_get_action():
# discrete control
action_space = DiscreteActionSpace(3)
epsilon_schedule = LinearSchedule(1.0, 1.0, 1000)
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0)
# verify that test phase gives greedy actions (evaluation_epsilon = 0)
policy.change_phase(RunPhase.TEST)
for i in range(100):
best_action = policy.get_action(np.array([10, 20, 30]))
assert best_action == 2
# verify that train phase gives uniform actions (exploration = 1)
policy.change_phase(RunPhase.TRAIN)
counters = np.array([0, 0, 0])
for i in range(30000):
best_action = policy.get_action(np.array([10, 20, 30]))
counters[best_action] += 1
assert np.all(counters > 9500) # this is noisy so we allow 5% error
# TODO: test continuous actions
@pytest.mark.unit_test
def test_change_phase():
# discrete control
action_space = DiscreteActionSpace(3)
epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
# verify schedule not applying if not in training phase
assert policy.get_control_param() == 1.0
policy.change_phase(RunPhase.TEST)
best_action = policy.get_action(np.array([10, 20, 30]))
assert policy.epsilon_schedule.current_value == 1.0
policy.change_phase(RunPhase.HEATUP)
best_action = policy.get_action(np.array([10, 20, 30]))
assert policy.epsilon_schedule.current_value == 1.0
policy.change_phase(RunPhase.UNDEFINED)
best_action = policy.get_action(np.array([10, 20, 30]))
assert policy.epsilon_schedule.current_value == 1.0
@pytest.mark.unit_test
def test_get_control_param():
# discrete control
action_space = DiscreteActionSpace(3)
epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
# verify schedule applies to TRAIN phase
policy.change_phase(RunPhase.TRAIN)
for i in range(999):
best_action = policy.get_action(np.array([10, 20, 30]))
assert 1.0 > policy.get_control_param() > 0.1
best_action = policy.get_action(np.array([10, 20, 30]))
assert policy.get_control_param() == 0.1
# test phases
policy.change_phase(RunPhase.TEST)
assert policy.get_control_param() == 0.01
policy.change_phase(RunPhase.TRAIN)
assert policy.get_control_param() == 0.1
policy.change_phase(RunPhase.HEATUP)
assert policy.get_control_param() == 0.1

View File

@@ -0,0 +1,34 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
from rl_coach.exploration_policies.greedy import Greedy
import numpy as np
@pytest.mark.unit_test
def test_get_action():
# discrete control
action_space = DiscreteActionSpace(3)
policy = Greedy(action_space)
best_action = policy.get_action(np.array([10, 20, 30]))
assert best_action == 2
# continuous control
action_space = BoxActionSpace(np.array([10]))
policy = Greedy(action_space)
best_action = policy.get_action(np.array([1, 1, 1]))
assert np.all(best_action == np.array([1, 1, 1]))
@pytest.mark.unit_test
def test_get_control_param():
action_space = DiscreteActionSpace(3)
policy = Greedy(action_space)
assert policy.get_control_param() == 0

View File

@@ -0,0 +1,85 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
from rl_coach.exploration_policies.ou_process import OUProcess
from rl_coach.core_types import RunPhase
import numpy as np
@pytest.mark.unit_test
def test_init():
# discrete control
action_space = DiscreteActionSpace(3)
# OU process doesn't work for discrete controls
with pytest.raises(ValueError):
policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
@pytest.mark.unit_test
def test_get_action():
action_space = BoxActionSpace(np.array([10]), -1, 1)
policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
# make sure no noise is added in the testing phase
policy.change_phase(RunPhase.TEST)
assert np.all(policy.get_action(np.zeros((10,))) == np.zeros((10,)))
rand_action = np.random.rand(10)
assert np.all(policy.get_action(rand_action) == rand_action)
# make sure the noise added in the training phase matches the golden
policy.change_phase(RunPhase.TRAIN)
np.random.seed(0)
targets = [
[0.03528105, 0.00800314, 0.01957476, 0.04481786, 0.03735116, - 0.01954556, 0.01900177, - 0.00302714, - 0.00206438, 0.00821197],
[0.03812664, 0.03708061, 0.03477594, 0.04720655, 0.04619107, - 0.01285253, 0.04886435, - 0.00712728, 0.00419904, - 0.00887816],
[-0.01297129, 0.0501159, 0.05202989, 0.03231604, 0.09153997, - 0.04192699, 0.04973065, - 0.01086383, 0.03485043, 0.0205179],
[-0.00985937, 0.05762904, 0.03422214, - 0.00733221, 0.08449019, - 0.03875808, 0.07428674, 0.01319463, 0.02706904, 0.01445132],
[-3.08205658e-02, 2.91710492e-02, 6.25166679e-05, 3.16906342e-02, 7.42126579e-02, - 4.74808080e-02, 4.91565431e-02, 2.87312413e-02, - 5.23598615e-03, 1.01820670e-02],
[-0.04869908, 0.03687993, - 0.01015365, 0.0080463, 0.0735748, -0.03886669, 0.05043773, 0.03475195, - 0.01791719, 0.00291706],
[-0.06209959, 0.02965198, - 0.02640642, - 0.0264874, 0.07704975, - 0.04686344, 0.01778333, 0.04397284, - 0.03604524, 0.00395305],
[-0.04745568, 0.03220199, - 0.003592, -0.05115743, 0.08501953, - 0.06051278, 0.0003496, 0.03235188, - 0.04224025, 0.00507241],
[-0.07071122, 0.05018632, 0.00572484, - 0.08183114, 0.11469956, - 0.02253448, 0.02392484, 0.02872103, - 0.06361306, 0.02615637],
[-0.07870404, 0.07458503, 0.00988462, - 0.06221653, 0.12171218, - 0.00838049, 0.02411092, 0.06440972, - 0.0610112, 0.03417],
[-0.04096233, 0.04755527, - 0.01553497, - 0.04276638, 0.098128, 0.03050032, 0.01581443, 0.04939621, - 0.02249135, 0.06374613],
[-0.00357018, 0.06562861, - 0.03274395, - 0.00452232, 0.09266981, 0.04651895, 0.03474365, 0.04624661, - 0.01018727, 0.08212651],
]
for i in range(10):
current_noise = policy.get_action(np.zeros((10,)))
assert np.all(np.abs(current_noise - targets[i]) < 1e-7)
# get some statistics. check very roughly that the mean acts according to the definition of the policy
# mean of 0
vals = []
for i in range(50000):
current_noise = policy.get_action(np.zeros((10,)))
vals.append(current_noise)
assert np.all(np.abs(np.mean(vals, axis=0)) < 1)
# mean of 10
policy = OUProcess(action_space, mu=10, theta=0.1, sigma=0.2, dt=0.01)
policy.change_phase(RunPhase.TRAIN)
vals = []
for i in range(50000):
current_noise = policy.get_action(np.zeros((10,)))
vals.append(current_noise)
assert np.all(np.abs(np.mean(vals, axis=0) - 10) < 1)
# plot the noise values - only used for understanding how the noise actually looks
# import matplotlib.pyplot as plt
# vals = np.array(vals)
# for i in range(10):
# plt.plot(list(range(10000)), vals[:, i])
# plt.plot(list(range(10000)), vals[:, i])
# plt.plot(list(range(10000)), vals[:, i])
# plt.show()
if __name__ == "__main__":
test_get_action()

View File

View File

@@ -0,0 +1,44 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.filters.action.attention_discretization import AttentionDiscretization
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace, AttentionActionSpace
import numpy as np
@pytest.mark.unit_test
def test_filter():
filter = AttentionDiscretization(2)
# passing an output space that is wrong
with pytest.raises(ValueError):
filter.validate_output_action_space(DiscreteActionSpace(10))
with pytest.raises(ValueError):
filter.validate_output_action_space(BoxActionSpace(10))
# 1 dimensional box
output_space = AttentionActionSpace(2, 0, 83)
input_space = filter.get_unfiltered_action_space(output_space)
assert np.all(filter.target_actions == np.array([[[0., 0.], [41.5, 41.5]],
[[0., 41.5], [41.5, 83.]],
[[41.5, 0], [83., 41.5]],
[[41.5, 41.5], [83., 83.]]]))
assert input_space.actions == list(range(4))
action = 2
result = filter.filter(action)
assert np.all(result == np.array([[41.5, 0], [83., 41.5]]))
assert output_space.val_matches_space_definition(result)
# force int bins
filter = AttentionDiscretization(2, force_int_bins=True)
input_space = filter.get_unfiltered_action_space(output_space)
assert np.all(filter.target_actions == np.array([[[0., 0.], [41, 41]],
[[0., 41], [41, 83.]],
[[41, 0], [83., 41]],
[[41, 41], [83., 83.]]]))

View File

@@ -0,0 +1,45 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.filters.action.box_discretization import BoxDiscretization
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
@pytest.mark.unit_test
def test_filter():
filter = BoxDiscretization(9)
# passing an output space that is wrong
with pytest.raises(ValueError):
filter.validate_output_action_space(DiscreteActionSpace(10))
# 1 dimensional box
output_space = BoxActionSpace(1, 5, 15)
input_space = filter.get_unfiltered_action_space(output_space)
assert filter.target_actions == [[5.], [6.25], [7.5], [8.75], [10.], [11.25], [12.5], [13.75], [15.]]
assert input_space.actions == list(range(9))
action = 2
result = filter.filter(action)
assert result == [7.5]
assert output_space.val_matches_space_definition(result)
# 2 dimensional box
filter = BoxDiscretization(3)
output_space = BoxActionSpace(2, 5, 15)
input_space = filter.get_unfiltered_action_space(output_space)
assert filter.target_actions == [[5., 5.], [5., 10.], [5., 15.],
[10., 5.], [10., 10.], [10., 15.],
[15., 5.], [15., 10.], [15., 15.]]
assert input_space.actions == list(range(9))
action = 2
result = filter.filter(action)
assert result == [5., 15.]
assert output_space.val_matches_space_definition(result)

View File

@@ -0,0 +1,27 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.filters.action.box_masking import BoxMasking
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
import numpy as np
@pytest.mark.unit_test
def test_filter():
filter = BoxMasking(10, 20)
# passing an output space that is wrong
with pytest.raises(ValueError):
filter.validate_output_action_space(DiscreteActionSpace(10))
# 1 dimensional box
output_space = BoxActionSpace(1, 5, 30)
input_space = filter.get_unfiltered_action_space(output_space)
action = np.array([2])
result = filter.filter(action)
assert result == np.array([12])
assert output_space.val_matches_space_definition(result)

View File

@@ -0,0 +1,29 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.filters.action.linear_box_to_box_map import LinearBoxToBoxMap
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
import numpy as np
@pytest.mark.unit_test
def test_filter():
filter = LinearBoxToBoxMap(10, 20)
# passing an output space that is wrong
with pytest.raises(ValueError):
filter.validate_output_action_space(DiscreteActionSpace(10))
# 1 dimensional box
output_space = BoxActionSpace(1, 5, 35)
input_space = filter.get_unfiltered_action_space(output_space)
action = np.array([2])
action = np.array([12])
result = filter.filter(action)
assert result == np.array([11])
assert output_space.val_matches_space_definition(result)

View File

@@ -0,0 +1,90 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
from rl_coach.filters.filter import InputFilter
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
@pytest.fixture
def env_response():
observation = np.random.rand(10, 20, 30)
return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
@pytest.mark.unit_test
def test_filter(env_response):
crop_low = np.array([0, 5, 10])
crop_high = np.array([5, 10, 20])
crop_filter = InputFilter()
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
result = crop_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# validate the shape of the filtered observation
assert filtered_observation.shape == (5, 5, 10)
# validate the content of the filtered observation
assert np.all(filtered_observation == unfiltered_observation[0:5, 5:10, 10:20])
# crop with -1 on some axes
crop_low = np.array([0, 0, 0])
crop_high = np.array([5, -1, -1])
crop_filter = InputFilter()
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
result = crop_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# validate the shape of the filtered observation
assert filtered_observation.shape == (5, 20, 30)
# validate the content of the filtered observation
assert np.all(filtered_observation == unfiltered_observation[0:5, :, :])
@pytest.mark.unit_test
def test_get_filtered_observation_space():
crop_low = np.array([0, 5, 10])
crop_high = np.array([5, 10, 20])
crop_filter = InputFilter()
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
observation_space = ObservationSpace(np.array([5, 10, 20]))
filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
# make sure the new observation space shape is calculated correctly
assert np.all(filtered_observation_space.shape == np.array([5, 5, 10]))
# make sure the original observation space is unchanged
assert np.all(observation_space.shape == np.array([5, 10, 20]))
# crop_high is bigger than the observation space
high_error_observation_space = ObservationSpace(np.array([3, 8, 14]))
with pytest.raises(ValueError):
crop_filter.get_filtered_observation_space('observation', high_error_observation_space)
# crop_low is bigger than the observation space
low_error_observation_space = ObservationSpace(np.array([3, 3, 10]))
with pytest.raises(ValueError):
crop_filter.get_filtered_observation_space('observation', low_error_observation_space)
# crop with -1 on some axes
crop_low = np.array([0, 0, 0])
crop_high = np.array([5, -1, -1])
crop_filter = InputFilter()
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
observation_space = ObservationSpace(np.array([5, 10, 20]))
filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
# make sure the new observation space shape is calculated correctly
assert np.all(filtered_observation_space.shape == np.array([5, 10, 20]))

View File

@@ -0,0 +1,84 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
from rl_coach.spaces import VectorObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.mark.unit_test
def test_filter():
# Keep
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
reduction_filter = InputFilter()
reduction_filter.add_observation_filter('observation', 'reduce',
ObservationReductionBySubPartsNameFilter(
["a"],
ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
))
reduction_filter.get_filtered_observation_space('observation', observation_space)
result = reduction_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.shape == (3,)
# validate the shape of the filtered observation
assert filtered_observation.shape == (1,)
# Discard
reduction_filter = InputFilter()
reduction_filter.add_observation_filter('observation', 'reduce',
ObservationReductionBySubPartsNameFilter(
["a"],
ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
))
reduction_filter.get_filtered_observation_space('observation', observation_space)
result = reduction_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.shape == (3,)
# validate the shape of the filtered observation
assert filtered_observation.shape == (2,)
@pytest.mark.unit_test
def test_get_filtered_observation_space():
# Keep
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
reduction_filter = InputFilter()
reduction_filter.add_observation_filter('observation', 'reduce',
ObservationReductionBySubPartsNameFilter(
["a"],
ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
))
filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(filtered_observation_space.shape == np.array([1]))
assert filtered_observation_space.measurements_names == ['a']
# Discard
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
reduction_filter = InputFilter()
reduction_filter.add_observation_filter('observation', 'reduce',
ObservationReductionBySubPartsNameFilter(
["a"],
ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
))
filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(filtered_observation_space.shape == np.array([2]))
assert filtered_observation_space.measurements_names == ['b', 'c']

View File

@@ -0,0 +1,66 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter, RescaleInterpolationType
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.mark.filterwarnings('ignore:Conversion of')
@pytest.mark.unit_test
def test_filter():
# make an RGB observation smaller
env_response = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
result = rescale_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.shape == (20, 30, 3)
# validate the shape of the filtered observation
assert filtered_observation.shape == (10, 15, 3)
# make a grayscale observation bigger
env_response = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleSizeByFactorFilter(2, RescaleInterpolationType.BILINEAR))
result = rescale_filter.filter(env_response)[0]
filtered_observation = result.next_state['observation']
# validate the shape of the filtered observation
assert filtered_observation.shape == (40, 60)
assert np.all(filtered_observation == np.ones([40, 60]))
@pytest.mark.unit_test
def test_get_filtered_observation_space():
# error on wrong number of channels
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
observation_space = ObservationSpace(np.array([10, 20, 5]))
with pytest.raises(ValueError):
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
# error on wrong number of dimensions
observation_space = ObservationSpace(np.array([10, 20, 10, 3]))
with pytest.raises(ValueError):
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
# make sure the new observation space shape is calculated correctly
observation_space = ObservationSpace(np.array([10, 20, 3]))
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
# make sure the original observation space is unchanged
assert np.all(observation_space.shape == np.array([10, 20, 3]))

View File

@@ -0,0 +1,106 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
from rl_coach.spaces import ObservationSpace, ImageObservationSpace, PlanarMapsObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.mark.filterwarnings('ignore:Conversion of')
@pytest.mark.unit_test
def test_filter():
# make an RGB observation smaller
transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 3]),
high=255),
RescaleInterpolationType.BILINEAR))
result = rescale_filter.filter(transition)[0]
unfiltered_observation = transition.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.shape == (20, 30, 3)
# validate the shape of the filtered observation
assert filtered_observation.shape == (10, 20, 3)
assert np.all(filtered_observation == np.ones([10, 20, 3]))
# make a grayscale observation bigger
transition = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([40, 60]),
high=255),
RescaleInterpolationType.BILINEAR))
result = rescale_filter.filter(transition)[0]
filtered_observation = result.next_state['observation']
# validate the shape of the filtered observation
assert filtered_observation.shape == (40, 60)
assert np.all(filtered_observation == np.ones([40, 60]))
# rescale channels -> error
# with pytest.raises(ValueError):
# InputFilter(
# observation_filters=OrderedDict([('rescale',
# ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]),
# high=255),
# RescaleInterpolationType.BILINEAR))]))
# TODO: validate input to filter
# different number of axes -> error
# env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
# rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20])),
# RescaleInterpolationType.BILINEAR)
# with pytest.raises(ValueError):
# result = rescale_filter.filter(transition)
# channels first -> error
with pytest.raises(ValueError):
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([3, 10, 20]), high=255),
RescaleInterpolationType.BILINEAR)
@pytest.mark.unit_test
def test_get_filtered_observation_space():
# error on wrong number of channels
with pytest.raises(ValueError):
observation_filters = InputFilter()
observation_filters.add_observation_filter('observation', 'rescale',
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 5]),
high=255),
RescaleInterpolationType.BILINEAR))
# mismatch and wrong number of channels
rescale_filter = InputFilter()
rescale_filter.add_observation_filter('observation', 'rescale',
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 3]),
high=255),
RescaleInterpolationType.BILINEAR))
observation_space = PlanarMapsObservationSpace(np.array([10, 20, 5]), low=0, high=255)
with pytest.raises(ValueError):
rescale_filter.get_filtered_observation_space('observation', observation_space)
# error on wrong number of dimensions
observation_space = ObservationSpace(np.array([10, 20, 10, 3]), high=255)
with pytest.raises(ValueError):
rescale_filter.get_filtered_observation_space('observation', observation_space)
# make sure the new observation space shape is calculated correctly
observation_space = ImageObservationSpace(np.array([10, 20, 3]), high=255)
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
# make sure the original observation space is unchanged
assert np.all(observation_space.shape == np.array([10, 20, 3]))
# TODO: test that the type of the observation space stays the same

View File

@@ -0,0 +1,47 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.fixture
def rgb_to_y_filter():
rgb_to_y_filter = InputFilter()
rgb_to_y_filter.add_observation_filter('observation', 'rgb_to_y', ObservationRGBToYFilter())
return rgb_to_y_filter
@pytest.mark.unit_test
def test_filter(rgb_to_y_filter):
# convert RGB observation to graysacle
observation = np.random.rand(20, 30, 3)*255.0
transition = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
result = rgb_to_y_filter.filter(transition)[0]
unfiltered_observation = transition.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.shape == (20, 30, 3)
# make sure the filtering is done correctly
assert filtered_observation.shape == (20, 30)
@pytest.mark.unit_test
def test_get_filtered_observation_space(rgb_to_y_filter):
# error on observation space which are not RGB
observation_space = ObservationSpace(np.array([1, 2, 4]), 0, 100)
with pytest.raises(ValueError):
rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
result = rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(result.shape == np.array([1, 2]))

View File

@@ -0,0 +1,72 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_squeeze_filter import ObservationSqueezeFilter
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.mark.unit_test
def test_filter():
# make an RGB observation smaller
squeeze_filter = InputFilter()
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
squeeze_filter_with_axis = InputFilter()
squeeze_filter_with_axis.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(2))
observation = np.random.rand(20, 30, 1, 3)
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
result = squeeze_filter.filter(env_response)[0]
result_with_axis = squeeze_filter_with_axis.filter(env_response)[0]
unfiltered_observation_shape = env_response.next_state['observation'].shape
filtered_observation_shape = result.next_state['observation'].shape
filtered_observation_with_axis_shape = result_with_axis.next_state['observation'].shape
# make sure the original observation is unchanged
assert unfiltered_observation_shape == observation.shape
# make sure the filtering is done correctly
assert filtered_observation_shape == (20, 30, 3)
assert filtered_observation_with_axis_shape == (20, 30, 3)
observation = np.random.rand(1, 30, 1, 3)
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
result = squeeze_filter.filter(env_response)[0]
assert result.next_state['observation'].shape == (30, 3)
@pytest.mark.unit_test
def test_get_filtered_observation_space():
# error on observation space with shape not matching the filter squeeze axis configuration
squeeze_filter = InputFilter()
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(axis=3))
observation_space = ObservationSpace(np.array([20, 1, 30, 3]), 0, 100)
small_observation_space = ObservationSpace(np.array([20, 1, 30]), 0, 100)
with pytest.raises(ValueError):
squeeze_filter.get_filtered_observation_space('observation', observation_space)
squeeze_filter.get_filtered_observation_space('observation', small_observation_space)
# verify output observation space is correct
observation_space = ObservationSpace(np.array([1, 2, 3, 1]), 0, 200)
result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(result.shape == np.array([1, 2, 3]))
squeeze_filter = InputFilter()
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(result.shape == np.array([2, 3]))
if __name__ == '__main__':
test_filter()
test_get_filtered_observation_space()

View File

@@ -0,0 +1,78 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.fixture
def env_response():
observation = np.random.rand(20, 30, 1)
return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
@pytest.fixture
def stack_filter():
stack_filter = InputFilter()
stack_filter.add_observation_filter('observation', 'stack', ObservationStackingFilter(4, stacking_axis=-1))
return stack_filter
@pytest.mark.unit_test
def test_filter(stack_filter, env_response):
# stack observation on empty stack
result = stack_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# validate that the shape of the unfiltered observation is unchanged
assert unfiltered_observation.shape == (20, 30, 1)
assert np.array(filtered_observation).shape == (20, 30, 1, 4)
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
# stack observation on non-empty stack
result = stack_filter.filter(env_response)[0]
filtered_observation = result.next_state['observation']
assert np.array(filtered_observation).shape == (20, 30, 1, 4)
@pytest.mark.unit_test
def test_get_filtered_observation_space(stack_filter, env_response):
observation_space = ObservationSpace(np.array([5, 10, 20]))
filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
# make sure the new observation space shape is calculated correctly
assert np.all(filtered_observation_space.shape == np.array([5, 10, 20, 4]))
# make sure the original observation space is unchanged
assert np.all(observation_space.shape == np.array([5, 10, 20]))
# call after stack is already created with non-matching shape -> error
result = stack_filter.filter(env_response)[0]
with pytest.raises(ValueError):
filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
@pytest.mark.unit_test
def test_reset(stack_filter, env_response):
# stack observation on empty stack
result = stack_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
# reset and make sure the outputs are correct
stack_filter.reset()
unfiltered_observation = np.random.rand(20, 30, 1)
new_env_response = EnvResponse(next_state={'observation': unfiltered_observation}, reward=0, game_over=False)
result = stack_filter.filter(new_env_response)[0]
filtered_observation = result.next_state['observation']
assert np.all(np.array(filtered_observation)[:, :, :, 0] == unfiltered_observation)
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)

View File

@@ -0,0 +1,50 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
from rl_coach.spaces import ObservationSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
@pytest.mark.unit_test
def test_filter():
# make an RGB observation smaller
uint8_filter = InputFilter()
uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=255))
observation = np.random.rand(20, 30, 3)*255.0
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
result = uint8_filter.filter(env_response)[0]
unfiltered_observation = env_response.next_state['observation']
filtered_observation = result.next_state['observation']
# make sure the original observation is unchanged
assert unfiltered_observation.dtype == 'float64'
# make sure the filtering is done correctly
assert filtered_observation.dtype == 'uint8'
assert np.all(filtered_observation == observation.astype('uint8'))
@pytest.mark.unit_test
def test_get_filtered_observation_space():
# error on observation space with values not matching the filter configuration
uint8_filter = InputFilter()
uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=200))
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
with pytest.raises(ValueError):
uint8_filter.get_filtered_observation_space('observation', observation_space)
# verify output observation space is correct
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 200)
result = uint8_filter.get_filtered_observation_space('observation', observation_space)
assert np.all(result.high == 255)
assert np.all(result.low == 0)
assert np.all(result.shape == observation_space.shape)

View File

@@ -0,0 +1,74 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
from rl_coach.spaces import RewardSpace
from rl_coach.core_types import EnvResponse
from collections import OrderedDict
from rl_coach.filters.filter import InputFilter
@pytest.fixture
def clip_filter():
return InputFilter(reward_filters=OrderedDict([('clip', RewardClippingFilter(2, 10))]))
@pytest.mark.unit_test
def test_filter(clip_filter):
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
result = clip_filter.filter(transition)[0]
unfiltered_reward = transition.reward
filtered_reward = result.reward
# validate that the reward was clipped correctly
assert filtered_reward == 10
# make sure the original reward is unchanged
assert unfiltered_reward == 100
# reward in bounds
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=5, game_over=False)
result = clip_filter.filter(transition)[0]
assert result.reward == 5
# reward below bounds
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-5, game_over=False)
result = clip_filter.filter(transition)[0]
assert result.reward == 2
@pytest.mark.unit_test
def test_get_filtered_reward_space(clip_filter):
# reward is clipped
reward_space = RewardSpace(1, -100, 100)
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
# make sure the new reward space shape is calculated correctly
assert filtered_reward_space.shape == 1
assert filtered_reward_space.low == 2
assert filtered_reward_space.high == 10
# reward is unclipped
reward_space = RewardSpace(1, 5, 7)
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
# make sure the new reward space shape is calculated correctly
assert filtered_reward_space.shape == 1
assert filtered_reward_space.low == 5
assert filtered_reward_space.high == 7
# infinite reward is clipped
reward_space = RewardSpace(1, -np.inf, np.inf)
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
# make sure the new reward space shape is calculated correctly
assert filtered_reward_space.shape == 1
assert filtered_reward_space.low == 2
assert filtered_reward_space.high == 10

View File

@@ -0,0 +1,56 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter
from rl_coach.spaces import RewardSpace
from rl_coach.core_types import EnvResponse
from rl_coach.filters.filter import InputFilter
from collections import OrderedDict
@pytest.mark.unit_test
def test_filter():
rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
print(rescale_filter.observation_filters)
result = rescale_filter.filter(env_response)[0]
unfiltered_reward = env_response.reward
filtered_reward = result.reward
# validate that the reward was clipped correctly
assert filtered_reward == 10
# make sure the original reward is unchanged
assert unfiltered_reward == 100
# negative reward
env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False)
result = rescale_filter.filter(env_response)[0]
assert result.reward == -5
@pytest.mark.unit_test
def test_get_filtered_reward_space():
rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
# reward is clipped
reward_space = RewardSpace(1, -100, 100)
filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
# make sure the new reward space shape is calculated correctly
assert filtered_reward_space.shape == 1
assert filtered_reward_space.low == -10
assert filtered_reward_space.high == 10
# unbounded rewards
reward_space = RewardSpace(1, -np.inf, np.inf)
filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
# make sure the new reward space shape is calculated correctly
assert filtered_reward_space.shape == 1
assert filtered_reward_space.low == -np.inf
assert filtered_reward_space.high == np.inf

View File

@@ -0,0 +1,70 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
from rl_coach.filters.filter import InputFilter
from rl_coach.spaces import ImageObservationSpace
import numpy as np
from rl_coach.core_types import EnvResponse
from collections import OrderedDict
@pytest.mark.filterwarnings('ignore:Conversion of')
@pytest.mark.unit_test
def test_filter_stacking():
# test that filter stacking works fine by taking as input a transition with:
# - an observation of shape 210x160,
# - a reward of 100
# filtering it by:
# - rescaling the observation to 110x84
# - cropping the observation to 84x84
# - clipping the reward to 1
# - stacking 4 observations to get 84x84x4
env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False)
filter1 = ObservationRescaleToSizeFilter(
output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255),
rescaling_interpolation_type=RescaleInterpolationType.BILINEAR
)
filter2 = ObservationCropFilter(
crop_low=np.array([16, 0]),
crop_high=np.array([100, 84])
)
filter3 = RewardClippingFilter(
clipping_low=-1,
clipping_high=1
)
output_filter = ObservationStackingFilter(
stack_size=4,
stacking_axis=-1
)
input_filter = InputFilter(
observation_filters={
"observation": OrderedDict([
("filter1", filter1),
("filter2", filter2),
("output_filter", output_filter)
])},
reward_filters=OrderedDict([
("filter3", filter3)
])
)
result = input_filter.filter(env_response)[0]
observation = np.array(result.next_state['observation'])
assert observation.shape == (84, 84, 4)
assert np.all(observation == np.ones([84, 84, 4]))
assert result.reward == 1

View File

@@ -0,0 +1,355 @@
#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import glob
import os
import shutil
import signal
import subprocess
import sys
from importlib import import_module
from os import path
sys.path.append('.')
import numpy as np
import pandas as pd
import time
# -*- coding: utf-8 -*-
from rl_coach.logger import screen
def read_csv_paths(test_path, filename_pattern, read_csv_tries=50):
csv_paths = []
tries_counter = 0
while not csv_paths:
csv_paths = glob.glob(path.join(test_path, '*', filename_pattern))
if tries_counter > read_csv_tries:
break
tries_counter += 1
time.sleep(1)
return csv_paths
def clean_df(df):
if 'Wall-Clock Time' in df.keys():
df.drop(['Wall-Clock Time'], 1, inplace=True)
return df
def print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args):
percentage = int((100 * last_num_episodes) / preset_validation_params.max_episodes_to_achieve_reward)
sys.stdout.write("\rReward: ({}/{})".format(round(averaged_rewards[-1], 1),
preset_validation_params.min_reward_threshold))
sys.stdout.write(' Time (sec): ({}/{})'.format(round(time.time() - start_time, 2), args.time_limit))
sys.stdout.write(' Episode: ({}/{})'.format(last_num_episodes,
preset_validation_params.max_episodes_to_achieve_reward))
sys.stdout.write(
' {}%|{}{}| '.format(percentage, '#' * int(percentage / 10), ' ' * (10 - int(percentage / 10))))
sys.stdout.flush()
def perform_reward_based_tests(args, preset_validation_params, preset_name):
win_size = 10
test_name = '__test_reward'
test_path = os.path.join('./experiments', test_name)
if path.exists(test_path):
shutil.rmtree(test_path)
# run the experiment in a separate thread
screen.log_title("Running test {}".format(preset_name))
log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
cmd = (
'python3 rl_coach/coach.py '
'-p {preset_name} '
'-e {test_name} '
'-n {num_workers} '
'--seed 0 '
'-c '
'{level} '
'&> {log_file_name} '
).format(
preset_name=preset_name,
test_name=test_name,
num_workers=preset_validation_params.num_workers,
log_file_name=log_file_name,
level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else ''
)
p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
start_time = time.time()
reward_str = 'Evaluation Reward'
if preset_validation_params.num_workers > 1:
filename_pattern = 'worker_0*.csv'
else:
filename_pattern = '*.csv'
test_passed = False
# get the csv with the results
csv_paths = read_csv_paths(test_path, filename_pattern)
if csv_paths:
csv_path = csv_paths[0]
# verify results
csv = None
time.sleep(1)
averaged_rewards = [0]
last_num_episodes = 0
if not args.no_progress_bar:
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
while csv is None or (csv['Episode #'].values[
-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit):
try:
csv = pd.read_csv(csv_path)
except:
# sometimes the csv is being written at the same time we are
# trying to read it. no problem -> try again
continue
if reward_str not in csv.keys():
continue
rewards = csv[reward_str].values
rewards = rewards[~np.isnan(rewards)]
if len(rewards) >= 1:
averaged_rewards = np.convolve(rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid')
else:
time.sleep(1)
continue
if not args.no_progress_bar:
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
if csv['Episode #'].shape[0] - last_num_episodes <= 0:
continue
last_num_episodes = csv['Episode #'].values[-1]
# check if reward is enough
if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold):
test_passed = True
break
time.sleep(1)
# kill test and print result
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
screen.log('')
if test_passed:
screen.success("Passed successfully")
else:
if time.time() - start_time > args.time_limit:
screen.error("Failed due to exceeding time limit", crash=False)
if args.verbose:
screen.error("command exitcode: {}".format(p.returncode), crash=False)
screen.error(open(log_file_name).read(), crash=False)
elif csv_paths:
screen.error("Failed due to insufficient reward", crash=False)
if args.verbose:
screen.error("command exitcode: {}".format(p.returncode), crash=False)
screen.error(open(log_file_name).read(), crash=False)
screen.error("preset_validation_params.max_episodes_to_achieve_reward: {}".format(
preset_validation_params.max_episodes_to_achieve_reward), crash=False)
screen.error("preset_validation_params.min_reward_threshold: {}".format(
preset_validation_params.min_reward_threshold), crash=False)
screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
else:
screen.error("csv file never found", crash=False)
if args.verbose:
screen.error("command exitcode: {}".format(p.returncode), crash=False)
screen.error(open(log_file_name).read(), crash=False)
shutil.rmtree(test_path)
os.remove(log_file_name)
return test_passed
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
test_name = '__test_trace'
test_path = os.path.join('./experiments', test_name)
if path.exists(test_path):
shutil.rmtree(test_path)
# run the experiment in a separate thread
screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else ''))
log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
cmd = (
'python3 rl_coach/coach.py '
'-p {preset_name} '
'-e {test_name} '
'--seed 42 '
'-c '
'--no_summary '
'-cp {custom_param} '
'{level} '
'&> {log_file_name} '
).format(
preset_name=preset_name,
test_name=test_name,
log_file_name=log_file_name,
level='-lvl ' + level if level else '',
custom_param='\"improve_steps=EnvironmentSteps({n});'
'steps_between_evaluation_periods=EnvironmentSteps({n});'
'evaluation_steps=EnvironmentSteps(1);'
'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)
)
p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
p.wait()
filename_pattern = '*.csv'
# get the csv with the results
csv_paths = read_csv_paths(test_path, filename_pattern)
test_passed = False
if not csv_paths:
screen.error("csv file never found", crash=False)
if args.verbose:
screen.error("command exitcode: {}".format(p.returncode), crash=False)
screen.error(open(log_file_name).read(), crash=False)
else:
trace_path = os.path.join('./rl_coach', 'traces', preset_name + '_' + level if level else preset_name, '')
if not os.path.exists(trace_path):
screen.log('No trace found, creating new trace in: {}'.format(trace_path))
os.makedirs(os.path.dirname(trace_path))
df = pd.read_csv(csv_paths[0])
df = clean_df(df)
df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
screen.success("Successfully created new trace.")
test_passed = True
else:
test_df = pd.read_csv(csv_paths[0])
test_df = clean_df(test_df)
new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
test_df.to_csv(new_trace_csv_path, index=False)
test_df = pd.read_csv(new_trace_csv_path)
trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
trace_csv_path = trace_csv_path[0]
trace_df = pd.read_csv(trace_csv_path)
test_passed = test_df.equals(trace_df)
if test_passed:
screen.success("Passed successfully.")
os.remove(new_trace_csv_path)
else:
screen.error("Trace test failed.", crash=False)
if args.overwrite:
os.remove(trace_csv_path)
os.rename(new_trace_csv_path, trace_csv_path)
screen.error("Overwriting old trace.", crash=False)
else:
screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)
shutil.rmtree(test_path)
os.remove(log_file_name)
return test_passed
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--trace',
help="(flag) perform trace based testing",
action='store_true')
parser.add_argument('-p', '--preset',
help="(string) Name of a preset to run (as configured in presets.py)",
default=None,
type=str)
parser.add_argument('-ip', '--ignore_presets',
help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
default=None,
type=str)
parser.add_argument('-v', '--verbose',
help="(flag) display verbose logs in the event of an error",
action='store_true')
parser.add_argument('--stop_after_first_failure',
help="(flag) stop executing tests after the first error",
action='store_true')
parser.add_argument('-tl', '--time_limit',
help="time limit for each test in minutes",
default=40, # setting time limit to be so high due to DDPG being very slow - its tests are long
type=int)
parser.add_argument('-np', '--no_progress_bar',
help="(flag) Don't print the progress bar (makes jenkins logs more readable)",
action='store_true')
parser.add_argument('-ow', '--overwrite',
help="(flag) overwrite old trace with new ones in trace testing mode",
action='store_true')
args = parser.parse_args()
if args.preset is not None:
presets_lists = [args.preset]
else:
# presets_lists = list_all_classes_in_module(presets)
presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
f[-3:] == '.py' and not f == '__init__.py']
fail_count = 0
test_count = 0
args.time_limit = 60 * args.time_limit
if args.ignore_presets is not None:
presets_to_ignore = args.ignore_presets.split(',')
else:
presets_to_ignore = []
for idx, preset_name in enumerate(sorted(presets_lists)):
if args.stop_after_first_failure and fail_count > 0:
break
if preset_name not in presets_to_ignore:
try:
preset = import_module('rl_coach.presets.{}'.format(preset_name))
except:
if args.verbose:
screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
continue
preset_validation_params = preset.graph_manager.preset_validation_params
if not args.trace and not preset_validation_params.test:
continue
if args.trace:
num_env_steps = preset_validation_params.trace_max_env_steps
if preset_validation_params.trace_test_levels:
for level in preset_validation_params.trace_test_levels:
test_count += 1
test_passed = perform_trace_based_tests(args, preset_name, num_env_steps, level)
else:
test_count += 1
test_passed = perform_trace_based_tests(args, preset_name, num_env_steps)
else:
test_passed = perform_reward_based_tests(args, preset_validation_params, preset_name)
if not test_passed:
fail_count += 1
screen.separator()
if fail_count == 0:
screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
else:
screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,52 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import tensorflow as tf
from rl_coach.base_parameters import TaskParameters, DistributedTaskParameters
from rl_coach.utils import get_open_port
from multiprocessing import Process
from tensorflow import logging
import pytest
logging.set_verbosity(logging.INFO)
@pytest.mark.unit_test
def test_basic_rl_graph_manager_with_pong_a3c():
tf.reset_default_graph()
from rl_coach.presets.Atari_A3C import graph_manager
assert graph_manager
graph_manager.env_params.level = "PongDeterministic-v4"
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
experiment_path="./experiments/test"))
# graph_manager.improve()
@pytest.mark.unit_test
def test_basic_rl_graph_manager_with_pong_nec():
tf.reset_default_graph()
from rl_coach.presets.Atari_NEC import graph_manager
assert graph_manager
graph_manager.env_params.level = "PongDeterministic-v4"
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
experiment_path="./experiments/test"))
# graph_manager.improve()
@pytest.mark.unit_test
def test_basic_rl_graph_manager_with_cartpole_dqn():
tf.reset_default_graph()
from rl_coach.presets.CartPole_DQN import graph_manager
assert graph_manager
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
experiment_path="./experiments/test"))
# graph_manager.improve()
if __name__ == '__main__':
pass
# test_basic_rl_graph_manager_with_pong_a3c()
# test_basic_rl_graph_manager_with_ant_a3c()
# test_basic_rl_graph_manager_with_pong_nec()
# test_basic_rl_graph_manager_with_cartpole_dqn()
#test_basic_rl_graph_manager_multithreaded_with_pong_a3c()
#test_basic_rl_graph_manager_with_doom_basic_dqn()

View File

View File

@@ -0,0 +1,91 @@
# nasty hack to deal with issue #46
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
import time
from rl_coach.memories.non_episodic.differentiable_neural_dictionary import QDND
import tensorflow as tf
NUM_ACTIONS = 3
NUM_DND_ENTRIES_TO_ADD = 10000
EMBEDDING_SIZE = 512
NUM_SAMPLED_EMBEDDINGS = 500
NUM_NEIGHBORS = 10
DND_SIZE = 500000
@pytest.fixture()
def dnd():
return QDND(
DND_SIZE,
EMBEDDING_SIZE,
NUM_ACTIONS,
0.1,
key_error_threshold=0,
learning_rate=0.0001,
num_neighbors=NUM_NEIGHBORS
)
@pytest.mark.unit_test
def test_random_sample_from_dnd(dnd: QDND):
# store single non terminal transition
embeddings = [np.random.rand(EMBEDDING_SIZE) for j in range(NUM_DND_ENTRIES_TO_ADD)]
actions = [np.random.randint(NUM_ACTIONS) for j in range(NUM_DND_ENTRIES_TO_ADD)]
values = [np.random.rand() for j in range(NUM_DND_ENTRIES_TO_ADD)]
dnd.add(embeddings, actions, values)
dnd_embeddings, dnd_values, dnd_indices = dnd.query(embeddings[0:10], 0, NUM_NEIGHBORS)
# calculate_normalization_factor
sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
sub = tf_current_embedding - tf_other_embeddings
square = tf.square(sub)
result = tf.reduce_sum(square)
###########################
# more efficient method
###########################
sampled_embeddings_expanded = tf.placeholder(
tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
sub2 = sampled_embeddings_tiled - sampled_embeddings_transposed
square2 = tf.square(sub2)
result2 = tf.reduce_sum(square2)
config = tf.ConfigProto()
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
sess = tf.Session(config=config)
sum1 = 0
start = time.time()
for i in range(NUM_SAMPLED_EMBEDDINGS):
curr_sampled_embedding = sampled_embeddings[i]
other_embeddings = np.delete(sampled_embeddings, i, 0)
sum1 += sess.run(result, feed_dict={tf_current_embedding: curr_sampled_embedding, tf_other_embeddings: other_embeddings})
print("1st method: {} sec".format(time.time()-start))
start = time.time()
sum2 = sess.run(result2, feed_dict={sampled_embeddings_expanded: np.expand_dims(sampled_embeddings,0)})
print("2nd method: {} sec".format(time.time()-start))
# validate that results are equal
print("sum1 = {}, sum2 = {}".format(sum1, sum2))
norm_factor = -0.5/(coefficient * sum2)
if __name__ == '__main__':
test_random_sample_from_dnd(dnd())

View File

@@ -0,0 +1,97 @@
# nasty hack to deal with issue #46
import os
import sys
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters
from rl_coach.spaces import GoalsSpace, ReachingGoal
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
# print(sys.path)
import pytest
import numpy as np
from rl_coach.core_types import Transition, Episode
from rl_coach.memories.memory import MemoryGranularity
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplay, \
HindsightGoalSelectionMethod
#TODO: change from defining a new class to creating an instance from the parameters
class Parameters(EpisodicHindsightExperienceReplayParameters):
def __init__(self):
super().__init__()
self.max_size = (MemoryGranularity.Transitions, 100)
self.hindsight_transitions_per_regular_transition = 4
self.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
self.goals_space = GoalsSpace(goal_name='observation',
reward_type=ReachingGoal(distance_from_goal_threshold=0.1),
distance_metric=GoalsSpace.DistanceMetric.Euclidean)
@pytest.fixture
def episode():
episode = []
for i in range(10):
episode.append(Transition(
state={'observation': np.array([i]), 'desired_goal': np.array([i]), 'achieved_goal': np.array([i])},
action=i,
))
return episode
@pytest.fixture
def her():
params = Parameters().__dict__
import inspect
args = set(inspect.getfullargspec(EpisodicHindsightExperienceReplay.__init__).args).intersection(params)
params = {k: params[k] for k in args}
return EpisodicHindsightExperienceReplay(**params)
@pytest.mark.unit_test
def test_sample_goal(her, episode):
assert her._sample_goal(episode, 8) == 9
@pytest.mark.unit_test
def test_sample_goal_range(her, episode):
unseen_goals = set(range(1, 9))
for _ in range(500):
unseen_goals -= set([int(her._sample_goal(episode, 0))])
if not unseen_goals:
return
assert unseen_goals == set()
@pytest.mark.unit_test
def test_update_episode(her):
episode = Episode()
for i in range(10):
episode.insert(Transition(
state={'observation': np.array([i]), 'desired_goal': np.array([i+1]), 'achieved_goal': np.array([i+1])},
action=i,
game_over=i == 9,
reward=0 if i == 9 else -1,
))
her.store_episode(episode)
# print('her._num_transitions', her._num_transitions)
# 10 original transitions, and 9 transitions * 4 hindsight episodes
assert her.num_transitions() == 10 + (4 * 9)
# make sure that the goal state was never sampled from the past
for transition in her.transitions:
assert transition.state['desired_goal'] > transition.state['observation']
assert transition.next_state['desired_goal'] >= transition.next_state['observation']
if transition.reward == 0:
assert transition.game_over
else:
assert not transition.game_over
test_update_episode(her())

View File

@@ -0,0 +1,93 @@
# nasty hack to deal with issue #46
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.memories.non_episodic.prioritized_experience_replay import SegmentTree
@pytest.mark.unit_test
def test_sum_tree():
# test power of 2 sum tree
sum_tree = SegmentTree(size=4, operation=SegmentTree.Operation.SUM)
sum_tree.add(10, "10")
assert sum_tree.total_value() == 10
sum_tree.add(20, "20")
assert sum_tree.total_value() == 30
sum_tree.add(5, "5")
assert sum_tree.total_value() == 35
sum_tree.add(7.5, "7.5")
assert sum_tree.total_value() == 42.5
sum_tree.add(2.5, "2.5")
assert sum_tree.total_value() == 35
sum_tree.add(5, "5")
assert sum_tree.total_value() == 20
assert sum_tree.get(2) == (0, 2.5, '2.5')
assert sum_tree.get(3) == (1, 5.0, '5')
assert sum_tree.get(10) == (2, 5.0, '5')
assert sum_tree.get(13) == (3, 7.5, '7.5')
sum_tree.update(2, 10)
assert sum_tree.__str__() == "[25.]\n[ 7.5 17.5]\n[ 2.5 5. 10. 7.5]\n"
# test non power of 2 sum tree
with pytest.raises(ValueError):
sum_tree = SegmentTree(size=5, operation=SegmentTree.Operation.SUM)
@pytest.mark.unit_test
def test_min_tree():
min_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MIN)
min_tree.add(10, "10")
assert min_tree.total_value() == 10
min_tree.add(20, "20")
assert min_tree.total_value() == 10
min_tree.add(5, "5")
assert min_tree.total_value() == 5
min_tree.add(7.5, "7.5")
assert min_tree.total_value() == 5
min_tree.add(2, "2")
assert min_tree.total_value() == 2
min_tree.add(3, "3")
min_tree.add(3, "3")
min_tree.add(3, "3")
min_tree.add(5, "5")
assert min_tree.total_value() == 3
@pytest.mark.unit_test
def test_max_tree():
max_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MAX)
max_tree.add(10, "10")
assert max_tree.total_value() == 10
max_tree.add(20, "20")
assert max_tree.total_value() == 20
max_tree.add(5, "5")
assert max_tree.total_value() == 20
max_tree.add(7.5, "7.5")
assert max_tree.total_value() == 20
max_tree.add(2, "2")
assert max_tree.total_value() == 20
max_tree.add(3, "3")
max_tree.add(3, "3")
max_tree.add(3, "3")
max_tree.add(5, "5")
assert max_tree.total_value() == 5
# update
max_tree.update(1, 10)
assert max_tree.total_value() == 10
assert max_tree.__str__() == "[10.]\n[10. 3.]\n[ 5. 10. 3. 3.]\n"
max_tree.update(1, 2)
assert max_tree.total_value() == 5
assert max_tree.__str__() == "[5.]\n[5. 3.]\n[5. 2. 3. 3.]\n"
if __name__ == "__main__":
test_sum_tree()
test_min_tree()
test_max_tree()

View File

@@ -0,0 +1,81 @@
# nasty hack to deal with issue #46
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import numpy as np
from rl_coach.core_types import Transition
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBuffer
@pytest.fixture()
def buffer():
return SingleEpisodeBuffer()
@pytest.mark.unit_test
def test_store_and_get(buffer: SingleEpisodeBuffer):
# store single non terminal transition
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
buffer.store(transition)
assert buffer.length() == 1
assert buffer.num_complete_episodes() == 0
assert buffer.num_transitions_in_complete_episodes() == 0
assert buffer.num_transitions() == 1
# get the single stored transition
episode = buffer.get(0)
assert episode.length() == 1
assert episode.get_first_transition() is transition # check addresses are the same
assert episode.get_last_transition() is transition # check addresses are the same
# store single terminal transition
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
buffer.store(transition)
assert buffer.length() == 1
assert buffer.num_complete_episodes() == 1
assert buffer.num_transitions_in_complete_episodes() == 2
# check that the episode is valid
episode = buffer.get(0)
assert episode.length() == 2
assert episode.get_transition(0).total_return == 1 + 0.99
assert episode.get_transition(1).total_return == 1
assert buffer.mean_reward() == 1
# only one episode in the replay buffer
episode = buffer.get(1)
assert episode is None
# adding transitions after the first episode was closed
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=0, game_over=False)
buffer.store(transition)
assert buffer.length() == 1
assert buffer.num_complete_episodes() == 0
assert buffer.num_transitions_in_complete_episodes() == 0
# still only one episode
assert buffer.get(1) is None
assert buffer.mean_reward() == 0
@pytest.mark.unit_test
def test_clean(buffer: SingleEpisodeBuffer):
# add several transitions and then clean the buffer
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
for i in range(10):
buffer.store(transition)
assert buffer.num_transitions() == 10
buffer.clean()
assert buffer.num_transitions() == 0
# add more transitions after the clean and make sure they were really cleaned
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
buffer.store(transition)
assert buffer.num_transitions() == 1
assert buffer.num_transitions_in_complete_episodes() == 1
assert buffer.num_complete_episodes() == 1
for i in range(10):
assert buffer.sample(1)[0] is transition

View File

View File

@@ -0,0 +1,56 @@
# nasty hack to deal with issue #46
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
import os
import time
import shutil
from subprocess import Popen, DEVNULL
from rl_coach.logger import screen
@pytest.mark.integration_test
def test_all_presets_are_running():
# os.chdir("../../")
test_failed = False
all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py'])
for preset in all_presets:
print("Testing preset {}".format(preset))
# TODO: this is a temporary workaround for presets which define more than a single available level.
# we should probably do this in a more robust way
level = ""
if "Atari" in preset:
level = "breakout"
elif "Mujoco" in preset:
level = "inverted_pendulum"
elif "ControlSuite" in preset:
level = "pendulum:swingup"
params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"]
if level != "":
params += ["-lvl", level]
p = Popen(params, stdout=DEVNULL)
# wait 10 seconds overhead of initialization etc.
time.sleep(10)
return_value = p.poll()
if return_value is None:
screen.success("{} passed successfully".format(preset))
else:
test_failed = True
screen.error("{} failed".format(preset), crash=False)
p.kill()
if os.path.exists("experiments/.test"):
shutil.rmtree("experiments/.test")
assert not test_failed
if __name__ == "__main__":
test_all_presets_are_running()

View File

@@ -0,0 +1,5 @@
# content of pytest.ini
[pytest]
markers =
unit_test: short test that checks that a module is acting correctly
integration_test: long test that checks that the complete framework is running correctly

View File

@@ -0,0 +1,106 @@
import os
import sys
from rl_coach.core_types import EnvironmentSteps
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.schedules import LinearSchedule, ConstantSchedule, ExponentialSchedule, PieceWiseSchedule
import numpy as np
@pytest.mark.unit_test
def test_constant_schedule():
schedule = ConstantSchedule(0.3)
# make sure the values in the constant schedule don't change over time
for i in range(1000):
assert schedule.initial_value == 0.3
assert schedule.current_value == 0.3
schedule.step()
@pytest.mark.unit_test
def test_linear_schedule():
# increasing schedule
schedule = LinearSchedule(1, 3, 10)
# the schedule is defined in number of steps to get from 1 to 3 so there are 10 steps
# the linspace is defined in number of bins between 1 and 3 so theres are 11 bins
target_values = np.linspace(1, 3, 11)
for i in range(10):
# we round to 4 because there is a very small floating point division difference (1e-10)
assert round(schedule.current_value, 4) == round(target_values[i], 4)
schedule.step()
# make sure the value does not change after 10 steps
for i in range(10):
assert schedule.current_value == 3
# decreasing schedule
schedule = LinearSchedule(3, 1, 10)
target_values = np.linspace(3, 1, 11)
for i in range(10):
# we round to 4 because there is a very small floating point division difference (1e-10)
assert round(schedule.current_value, 4) == round(target_values[i], 4)
schedule.step()
# make sure the value does not change after 10 steps
for i in range(10):
assert schedule.current_value == 1
# constant schedule
schedule = LinearSchedule(3, 3, 10)
for i in range(10):
# we round to 4 because there is a very small floating point division difference (1e-10)
assert round(schedule.current_value, 4) == 3
schedule.step()
@pytest.mark.unit_test
def test_exponential_schedule():
# decreasing schedule
schedule = ExponentialSchedule(10, 3, 0.99)
current_power = 1
for i in range(100):
assert round(schedule.current_value,6) == round(10*current_power,6)
current_power *= 0.99
schedule.step()
for i in range(100):
schedule.step()
assert schedule.current_value == 3
@pytest.mark.unit_test
def test_piece_wise_schedule():
# decreasing schedule
schedule = PieceWiseSchedule(
[(LinearSchedule(1, 3, 10), EnvironmentSteps(5)),
(ConstantSchedule(4), EnvironmentSteps(10)),
(ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10))
]
)
target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4)
for i in range(16):
assert round(schedule.current_value, 4) == round(target_values[i], 4)
schedule.step()
current_power = 1
for i in range(10):
assert round(schedule.current_value, 4) == round(3*current_power, 4)
current_power *= 0.99
schedule.step()
if __name__ == "__main__":
test_constant_schedule()
test_linear_schedule()
test_exponential_schedule()
test_piece_wise_schedule()

View File

@@ -0,0 +1,198 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import pytest
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, MultiSelectActionSpace, ObservationSpace, AgentSelection, VectorObservationSpace, AttentionActionSpace
import numpy as np
@pytest.mark.unit_test
def test_discrete():
action_space = DiscreteActionSpace(3, ["zero", "one", "two"])
assert action_space.shape == 1
for i in range(100):
assert 3 > action_space.sample() >= 0
action_info = action_space.sample_with_info()
assert action_info.action_probability == 1. / 3
assert action_space.high == 2
assert action_space.low == 0
# list descriptions
assert action_space.get_description(1) == "one"
# dict descriptions
action_space = DiscreteActionSpace(3, {1: "one", 2: "two", 0: "zero"})
assert action_space.get_description(0) == "zero"
# no descriptions
action_space = DiscreteActionSpace(3)
assert action_space.get_description(0) == "0"
# descriptions for invalid action
with pytest.raises(ValueError):
assert action_space.get_description(3) == "0"
@pytest.mark.unit_test
def test_box():
# simple action space
action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"])
for i in range(100):
sample = action_space.sample()
assert np.all(-5 <= sample) and np.all(sample <= 5)
assert sample.shape == (4,)
assert sample.dtype == float
# test clipping
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
# more complex high and low definition
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), np.array([1, 2, 4, 5]), ["a", "b", "c", "d"])
for i in range(100):
sample = action_space.sample()
assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= np.array([1, 2, 4, 5]))
assert sample.shape == (4,)
assert sample.dtype == float
# test clipping
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
assert np.all(clipped_action == np.array([-5, 2, 2, 5]))
# mixed high and low definition
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5, ["a", "b", "c", "d"])
for i in range(100):
sample = action_space.sample()
assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= 5)
assert sample.shape == (4,)
assert sample.dtype == float
# test clipping
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
# invalid bounds
with pytest.raises(ValueError):
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1, ["a", "b", "c", "d"])
# TODO: test descriptions
@pytest.mark.unit_test
def test_multiselect():
action_space = MultiSelectActionSpace(4, 2, ["a", "b", "c", "d"])
for i in range(100):
action = action_space.sample()
assert action.shape == (4,)
assert np.sum(action) <= 2
# check that descriptions of multiple actions are working
description = action_space.get_description(np.array([1, 0, 1, 0]))
assert description == "a + c"
description = action_space.get_description(np.array([0, 0, 0, 0]))
assert description == "no-op"
@pytest.mark.unit_test
def test_attention():
low = np.array([-1, -2, -3, -4])
high = np.array([1, 2, 3, 4])
action_space = AttentionActionSpace(4, low=low, high=high)
for i in range(100):
action = action_space.sample()
assert len(action) == 2
assert action[0].shape == (4,)
assert action[1].shape == (4,)
assert np.all(action[0] <= action[1])
assert np.all(action[0] >= low)
assert np.all(action[1] < high)
@pytest.mark.unit_test
def test_goal():
# TODO: test goal action space
pass
@pytest.mark.unit_test
def test_agent_selection():
action_space = AgentSelection(10)
assert action_space.shape == 1
assert action_space.high == 9
assert action_space.low == 0
with pytest.raises(ValueError):
assert action_space.get_description(10)
assert action_space.get_description(0) == "0"
@pytest.mark.unit_test
def test_observation_space():
observation_space = ObservationSpace(np.array([1, 10]), -10, 10)
# testing that val_matches_space_definition works
assert observation_space.val_matches_space_definition(np.ones([1, 10]))
assert not observation_space.val_matches_space_definition(np.ones([2, 10]))
assert not observation_space.val_matches_space_definition(np.ones([1, 10]) * 100)
assert not observation_space.val_matches_space_definition(np.ones([1, 1, 10]))
# is_point_in_space_shape
assert observation_space.is_point_in_space_shape(np.array([0, 9]))
assert observation_space.is_point_in_space_shape(np.array([0, 0]))
assert not observation_space.is_point_in_space_shape(np.array([1, 8]))
assert not observation_space.is_point_in_space_shape(np.array([0, 10]))
assert not observation_space.is_point_in_space_shape(np.array([-1, 6]))
@pytest.mark.unit_test
def test_image_observation_space():
# TODO: test image observation space
pass
@pytest.mark.unit_test
def test_measurements_observation_space():
# empty measurements space
measurements_space = VectorObservationSpace(0)
# vector space
measurements_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
@pytest.mark.unit_test
def test_reward_space():
# TODO: test reward space
pass
# def test_discrete_to_linspace_action_space_map():
# box = BoxActionSpace(2, np.array([0, 0]), np.array([10, 10]))
# linspace = BoxDiscretization(box, [5, 3])
# assert np.all(linspace.actions == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]))
# assert np.all(linspace.target_actions ==
# np.array([[0.0, 0.0], [0.0, 5.0], [0.0, 10.0],
# [2.5, 0.0], [2.5, 5.0], [2.5, 10.0],
# [5.0, 0.0], [5.0, 5.0], [5.0, 10.0],
# [7.5, 0.0], [7.5, 5.0], [7.5, 10.0],
# [10.0, 0.0], [10.0, 5.0], [10.0, 10.0]]))
#
#
# def test_discrete_to_attention_action_space_map():
# attention = AttentionActionSpace(2, np.array([0, 0]), np.array([10, 10]))
# linspace = AttentionDiscretization(attention, 2)
# assert np.all(linspace.actions == np.array([0, 1, 2, 3]))
# assert np.all(linspace.target_actions ==
# np.array(
# [[[0., 0.], [5., 5.]],
# [[0., 5.], [5., 10.]],
# [[5., 0.], [10., 5.]],
# [[5., 5.], [10., 10.]]])
# )
if __name__ == "__main__":
test_observation_space()
test_discrete_to_linspace_action_space_map()
test_discrete_to_attention_action_space_map()