mirror of
https://github.com/gryf/coach.git
synced 2026-02-15 21:45:46 +01:00
pre-release 0.10.0
This commit is contained in:
0
rl_coach/tests/__init__.py
Normal file
0
rl_coach/tests/__init__.py
Normal file
0
rl_coach/tests/agents/__init__.py
Normal file
0
rl_coach/tests/agents/__init__.py
Normal file
33
rl_coach/tests/agents/test_agent_external_communication.py
Normal file
33
rl_coach/tests/agents/test_agent_external_communication.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from rl_coach.base_parameters import TaskParameters
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
import tensorflow as tf
|
||||
from tensorflow import logging
|
||||
import pytest
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_QActionStateValue_predictions():
|
||||
tf.reset_default_graph()
|
||||
from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
|
||||
assert cartpole_dqn_graph_manager
|
||||
cartpole_dqn_graph_manager.create_graph(task_parameters=
|
||||
TaskParameters(framework_type="tensorflow",
|
||||
experiment_path="./experiments/test"))
|
||||
cartpole_dqn_graph_manager.improve_steps.num_steps = 1
|
||||
cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
|
||||
|
||||
# graph_manager.improve()
|
||||
#
|
||||
# agent = graph_manager.level_managers[0].composite_agents['simple_rl_agent'].agents['simple_rl_agent/agent']
|
||||
# some_state = agent.memory.sample(1)[0].state
|
||||
# cartpole_dqn_predictions = agent.get_predictions(states=some_state, prediction_type=QActionStateValue)
|
||||
# assert cartpole_dqn_predictions.shape == (1, 2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_get_QActionStateValue_predictions()
|
||||
0
rl_coach/tests/architectures/__init__.py
Normal file
0
rl_coach/tests/architectures/__init__.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from rl_coach.base_parameters import EmbedderScheme
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder
|
||||
import tensorflow as tf
|
||||
from tensorflow import logging
|
||||
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
@pytest.fixture
|
||||
def reset():
|
||||
tf.reset_default_graph()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_embedder(reset):
|
||||
embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty)
|
||||
|
||||
# make sure the ops where not created yet
|
||||
assert len(tf.get_default_graph().get_operations()) == 0
|
||||
|
||||
# call the embedder
|
||||
input_ph, output_ph = embedder()
|
||||
|
||||
# make sure that now the ops were created
|
||||
assert len(tf.get_default_graph().get_operations()) > 0
|
||||
|
||||
# try feeding a batch of one example # TODO: consider auto converting to batch
|
||||
input = np.random.rand(1, 10, 10)
|
||||
sess = tf.Session()
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert output.shape == (1, 100) # should have flattened the input
|
||||
|
||||
# now make sure the returned placeholders behave the same
|
||||
output = sess.run(output_ph, {input_ph: input})
|
||||
assert output.shape == (1, 100) # should have flattened the input
|
||||
|
||||
# make sure the naming is correct
|
||||
assert embedder.get_name() == "test"
|
||||
@@ -0,0 +1,99 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from rl_coach.architectures.tensorflow_components.embedders.image_embedder import ImageEmbedder, EmbedderScheme
|
||||
import tensorflow as tf
|
||||
from tensorflow import logging
|
||||
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
@pytest.fixture
|
||||
def reset():
|
||||
tf.reset_default_graph()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_embedder(reset):
|
||||
# creating an embedder with a non-image input
|
||||
with pytest.raises(ValueError):
|
||||
embedder = ImageEmbedder(np.array([100]), name="test")
|
||||
with pytest.raises(ValueError):
|
||||
embedder = ImageEmbedder(np.array([100, 100]), name="test")
|
||||
with pytest.raises(ValueError):
|
||||
embedder = ImageEmbedder(np.array([10, 100, 100, 100]), name="test")
|
||||
|
||||
# creating a simple image embedder
|
||||
embedder = ImageEmbedder(np.array([100, 100, 10]), name="test")
|
||||
|
||||
# make sure the ops where not created yet
|
||||
assert len(tf.get_default_graph().get_operations()) == 0
|
||||
|
||||
# call the embedder
|
||||
input_ph, output_ph = embedder()
|
||||
|
||||
# make sure that now the ops were created
|
||||
assert len(tf.get_default_graph().get_operations()) > 0
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 100, 100, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert output.shape == (1, 5184)
|
||||
|
||||
# now make sure the returned placeholders behave the same
|
||||
output = sess.run(output_ph, {input_ph: input})
|
||||
assert output.shape == (1, 5184)
|
||||
|
||||
# make sure the naming is correct
|
||||
assert embedder.get_name() == "test"
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_complex_embedder(reset):
|
||||
# creating a deep vector embedder
|
||||
embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", scheme=EmbedderScheme.Deep)
|
||||
|
||||
# call the embedder
|
||||
embedder()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 100, 100, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert output.shape == (1, 256) # should have flattened the input
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_activation_function(reset):
|
||||
# creating a deep image embedder with relu
|
||||
embedder = ImageEmbedder(np.array([100, 100, 10]), name="relu", scheme=EmbedderScheme.Deep,
|
||||
activation_function=tf.nn.relu)
|
||||
|
||||
# call the embedder
|
||||
embedder()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 100, 100, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert np.all(output >= 0) # should have flattened the input
|
||||
|
||||
# creating a deep image embedder with tanh
|
||||
embedder_tanh = ImageEmbedder(np.array([100, 100, 10]), name="tanh", scheme=EmbedderScheme.Deep,
|
||||
activation_function=tf.nn.tanh)
|
||||
|
||||
# call the embedder
|
||||
embedder_tanh()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 100, 100, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
|
||||
assert np.all(output >= -1) and np.all(output <= 1)
|
||||
@@ -0,0 +1,95 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder, EmbedderScheme
|
||||
import tensorflow as tf
|
||||
from tensorflow import logging
|
||||
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
@pytest.fixture
|
||||
def reset():
|
||||
tf.reset_default_graph()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_embedder(reset):
|
||||
# creating a vector embedder with a matrix
|
||||
with pytest.raises(ValueError):
|
||||
embedder = VectorEmbedder(np.array([10, 10]), name="test")
|
||||
|
||||
# creating a simple vector embedder
|
||||
embedder = VectorEmbedder(np.array([10]), name="test")
|
||||
|
||||
# make sure the ops where not created yet
|
||||
assert len(tf.get_default_graph().get_operations()) == 0
|
||||
|
||||
# call the embedder
|
||||
input_ph, output_ph = embedder()
|
||||
|
||||
# make sure that now the ops were created
|
||||
assert len(tf.get_default_graph().get_operations()) > 0
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert output.shape == (1, 256)
|
||||
|
||||
# now make sure the returned placeholders behave the same
|
||||
output = sess.run(output_ph, {input_ph: input})
|
||||
assert output.shape == (1, 256)
|
||||
|
||||
# make sure the naming is correct
|
||||
assert embedder.get_name() == "test"
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_complex_embedder(reset):
|
||||
# creating a deep vector embedder
|
||||
embedder = VectorEmbedder(np.array([10]), name="test", scheme=EmbedderScheme.Deep)
|
||||
|
||||
# call the embedder
|
||||
embedder()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert output.shape == (1, 128) # should have flattened the input
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_activation_function(reset):
|
||||
# creating a deep vector embedder with relu
|
||||
embedder = VectorEmbedder(np.array([10]), name="relu", scheme=EmbedderScheme.Deep,
|
||||
activation_function=tf.nn.relu)
|
||||
|
||||
# call the embedder
|
||||
embedder()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder.output, {embedder.input: input})
|
||||
assert np.all(output >= 0) # should have flattened the input
|
||||
|
||||
# creating a deep vector embedder with tanh
|
||||
embedder_tanh = VectorEmbedder(np.array([10]), name="tanh", scheme=EmbedderScheme.Deep,
|
||||
activation_function=tf.nn.tanh)
|
||||
|
||||
# call the embedder
|
||||
embedder_tanh()
|
||||
|
||||
# try feeding a batch of one example
|
||||
input = np.random.rand(1, 10)
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
|
||||
assert np.all(output >= -1) and np.all(output <= 1)
|
||||
0
rl_coach/tests/environments/__init__.py
Normal file
0
rl_coach/tests/environments/__init__.py
Normal file
67
rl_coach/tests/environments/test_gym_environment.py
Normal file
67
rl_coach/tests/environments/test_gym_environment.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.environments.gym_environment import GymEnvironment
|
||||
from rl_coach.base_parameters import VisualizationParameters
|
||||
import numpy as np
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, ImageObservationSpace, VectorObservationSpace
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def atari_env():
|
||||
# create a breakout gym environment
|
||||
env = GymEnvironment(level='Breakout-v0',
|
||||
seed=1,
|
||||
frame_skip=4,
|
||||
visualization_parameters=VisualizationParameters())
|
||||
return env
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def continuous_env():
|
||||
# create a breakout gym environment
|
||||
env = GymEnvironment(level='Pendulum-v0',
|
||||
seed=1,
|
||||
frame_skip=1,
|
||||
visualization_parameters=VisualizationParameters())
|
||||
return env
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_gym_discrete_environment(atari_env):
|
||||
# observation space
|
||||
assert type(atari_env.state_space['observation']) == ImageObservationSpace
|
||||
assert np.all(atari_env.state_space['observation'].shape == [210, 160, 3])
|
||||
assert np.all(atari_env.last_env_response.next_state['observation'].shape == (210, 160, 3))
|
||||
|
||||
# action space
|
||||
assert type(atari_env.action_space) == DiscreteActionSpace
|
||||
assert np.all(atari_env.action_space.high == 3)
|
||||
|
||||
# make sure that the seed is working properly
|
||||
assert np.sum(atari_env.last_env_response.next_state['observation']) == 4115856
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_gym_continuous_environment(continuous_env):
|
||||
# observation space
|
||||
assert type(continuous_env.state_space['observation']) == VectorObservationSpace
|
||||
assert np.all(continuous_env.state_space['observation'].shape == [3])
|
||||
assert np.all(continuous_env.last_env_response.next_state['observation'].shape == (3,))
|
||||
|
||||
# action space
|
||||
assert type(continuous_env.action_space) == BoxActionSpace
|
||||
assert np.all(continuous_env.action_space.shape == np.array([1]))
|
||||
|
||||
# make sure that the seed is working properly
|
||||
assert np.sum(continuous_env.last_env_response.next_state['observation']) == 1.2661630859028832
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_step(atari_env):
|
||||
result = atari_env.step(0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_gym_continuous_environment(continuous_env())
|
||||
0
rl_coach/tests/exploration_policies/__init__.py
Normal file
0
rl_coach/tests/exploration_policies/__init__.py
Normal file
44
rl_coach/tests/exploration_policies/test_additive_noise.py
Normal file
44
rl_coach/tests/exploration_policies/test_additive_noise.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.exploration_policies.additive_noise import AdditiveNoise
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_init():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
noise_schedule = LinearSchedule(1.0, 1.0, 1000)
|
||||
|
||||
# additive noise doesn't work for discrete controls
|
||||
with pytest.raises(ValueError):
|
||||
policy = AdditiveNoise(action_space, noise_schedule, 0)
|
||||
|
||||
# additive noise requires a bounded range for the actions
|
||||
action_space = BoxActionSpace(np.array([10]))
|
||||
with pytest.raises(ValueError):
|
||||
policy = AdditiveNoise(action_space, noise_schedule, 0)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_action():
|
||||
# make sure noise is in range
|
||||
action_space = BoxActionSpace(np.array([10]), -1, 1)
|
||||
noise_schedule = LinearSchedule(1.0, 1.0, 1000)
|
||||
policy = AdditiveNoise(action_space, noise_schedule, 0)
|
||||
|
||||
# the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6
|
||||
for i in range(1000):
|
||||
action = policy.get_action(np.zeros([10]))
|
||||
assert np.all(action < 10)
|
||||
# make sure there is no clipping of the action since it should be the environment that clips actions
|
||||
assert np.all(action != 1.0)
|
||||
assert np.all(action != -1.0)
|
||||
# make sure that each action element has a different value
|
||||
assert np.all(action[0] != action[1:])
|
||||
81
rl_coach/tests/exploration_policies/test_e_greedy.py
Normal file
81
rl_coach/tests/exploration_policies/test_e_greedy.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.spaces import DiscreteActionSpace
|
||||
from rl_coach.exploration_policies.e_greedy import EGreedy
|
||||
from rl_coach.schedules import LinearSchedule
|
||||
import numpy as np
|
||||
from rl_coach.core_types import RunPhase
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_action():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
epsilon_schedule = LinearSchedule(1.0, 1.0, 1000)
|
||||
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0)
|
||||
|
||||
# verify that test phase gives greedy actions (evaluation_epsilon = 0)
|
||||
policy.change_phase(RunPhase.TEST)
|
||||
for i in range(100):
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert best_action == 2
|
||||
|
||||
# verify that train phase gives uniform actions (exploration = 1)
|
||||
policy.change_phase(RunPhase.TRAIN)
|
||||
counters = np.array([0, 0, 0])
|
||||
for i in range(30000):
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
counters[best_action] += 1
|
||||
assert np.all(counters > 9500) # this is noisy so we allow 5% error
|
||||
|
||||
# TODO: test continuous actions
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_change_phase():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
|
||||
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
|
||||
|
||||
# verify schedule not applying if not in training phase
|
||||
assert policy.get_control_param() == 1.0
|
||||
policy.change_phase(RunPhase.TEST)
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert policy.epsilon_schedule.current_value == 1.0
|
||||
policy.change_phase(RunPhase.HEATUP)
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert policy.epsilon_schedule.current_value == 1.0
|
||||
policy.change_phase(RunPhase.UNDEFINED)
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert policy.epsilon_schedule.current_value == 1.0
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_control_param():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
epsilon_schedule = LinearSchedule(1.0, 0.1, 1000)
|
||||
policy = EGreedy(action_space, epsilon_schedule, evaluation_epsilon=0.01)
|
||||
|
||||
# verify schedule applies to TRAIN phase
|
||||
policy.change_phase(RunPhase.TRAIN)
|
||||
for i in range(999):
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert 1.0 > policy.get_control_param() > 0.1
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert policy.get_control_param() == 0.1
|
||||
|
||||
# test phases
|
||||
policy.change_phase(RunPhase.TEST)
|
||||
assert policy.get_control_param() == 0.01
|
||||
|
||||
policy.change_phase(RunPhase.TRAIN)
|
||||
assert policy.get_control_param() == 0.1
|
||||
|
||||
policy.change_phase(RunPhase.HEATUP)
|
||||
assert policy.get_control_param() == 0.1
|
||||
34
rl_coach/tests/exploration_policies/test_greedy.py
Normal file
34
rl_coach/tests/exploration_policies/test_greedy.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.exploration_policies.greedy import Greedy
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_action():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
policy = Greedy(action_space)
|
||||
|
||||
best_action = policy.get_action(np.array([10, 20, 30]))
|
||||
assert best_action == 2
|
||||
|
||||
# continuous control
|
||||
action_space = BoxActionSpace(np.array([10]))
|
||||
policy = Greedy(action_space)
|
||||
|
||||
best_action = policy.get_action(np.array([1, 1, 1]))
|
||||
assert np.all(best_action == np.array([1, 1, 1]))
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_control_param():
|
||||
action_space = DiscreteActionSpace(3)
|
||||
policy = Greedy(action_space)
|
||||
assert policy.get_control_param() == 0
|
||||
|
||||
85
rl_coach/tests/exploration_policies/test_ou_process.py
Normal file
85
rl_coach/tests/exploration_policies/test_ou_process.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
|
||||
from rl_coach.exploration_policies.ou_process import OUProcess
|
||||
from rl_coach.core_types import RunPhase
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_init():
|
||||
# discrete control
|
||||
action_space = DiscreteActionSpace(3)
|
||||
|
||||
# OU process doesn't work for discrete controls
|
||||
with pytest.raises(ValueError):
|
||||
policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_action():
|
||||
action_space = BoxActionSpace(np.array([10]), -1, 1)
|
||||
policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01)
|
||||
|
||||
# make sure no noise is added in the testing phase
|
||||
policy.change_phase(RunPhase.TEST)
|
||||
assert np.all(policy.get_action(np.zeros((10,))) == np.zeros((10,)))
|
||||
rand_action = np.random.rand(10)
|
||||
assert np.all(policy.get_action(rand_action) == rand_action)
|
||||
|
||||
# make sure the noise added in the training phase matches the golden
|
||||
policy.change_phase(RunPhase.TRAIN)
|
||||
np.random.seed(0)
|
||||
targets = [
|
||||
[0.03528105, 0.00800314, 0.01957476, 0.04481786, 0.03735116, - 0.01954556, 0.01900177, - 0.00302714, - 0.00206438, 0.00821197],
|
||||
[0.03812664, 0.03708061, 0.03477594, 0.04720655, 0.04619107, - 0.01285253, 0.04886435, - 0.00712728, 0.00419904, - 0.00887816],
|
||||
[-0.01297129, 0.0501159, 0.05202989, 0.03231604, 0.09153997, - 0.04192699, 0.04973065, - 0.01086383, 0.03485043, 0.0205179],
|
||||
[-0.00985937, 0.05762904, 0.03422214, - 0.00733221, 0.08449019, - 0.03875808, 0.07428674, 0.01319463, 0.02706904, 0.01445132],
|
||||
[-3.08205658e-02, 2.91710492e-02, 6.25166679e-05, 3.16906342e-02, 7.42126579e-02, - 4.74808080e-02, 4.91565431e-02, 2.87312413e-02, - 5.23598615e-03, 1.01820670e-02],
|
||||
[-0.04869908, 0.03687993, - 0.01015365, 0.0080463, 0.0735748, -0.03886669, 0.05043773, 0.03475195, - 0.01791719, 0.00291706],
|
||||
[-0.06209959, 0.02965198, - 0.02640642, - 0.0264874, 0.07704975, - 0.04686344, 0.01778333, 0.04397284, - 0.03604524, 0.00395305],
|
||||
[-0.04745568, 0.03220199, - 0.003592, -0.05115743, 0.08501953, - 0.06051278, 0.0003496, 0.03235188, - 0.04224025, 0.00507241],
|
||||
[-0.07071122, 0.05018632, 0.00572484, - 0.08183114, 0.11469956, - 0.02253448, 0.02392484, 0.02872103, - 0.06361306, 0.02615637],
|
||||
[-0.07870404, 0.07458503, 0.00988462, - 0.06221653, 0.12171218, - 0.00838049, 0.02411092, 0.06440972, - 0.0610112, 0.03417],
|
||||
[-0.04096233, 0.04755527, - 0.01553497, - 0.04276638, 0.098128, 0.03050032, 0.01581443, 0.04939621, - 0.02249135, 0.06374613],
|
||||
[-0.00357018, 0.06562861, - 0.03274395, - 0.00452232, 0.09266981, 0.04651895, 0.03474365, 0.04624661, - 0.01018727, 0.08212651],
|
||||
]
|
||||
for i in range(10):
|
||||
current_noise = policy.get_action(np.zeros((10,)))
|
||||
assert np.all(np.abs(current_noise - targets[i]) < 1e-7)
|
||||
|
||||
# get some statistics. check very roughly that the mean acts according to the definition of the policy
|
||||
|
||||
# mean of 0
|
||||
vals = []
|
||||
for i in range(50000):
|
||||
current_noise = policy.get_action(np.zeros((10,)))
|
||||
vals.append(current_noise)
|
||||
assert np.all(np.abs(np.mean(vals, axis=0)) < 1)
|
||||
|
||||
# mean of 10
|
||||
policy = OUProcess(action_space, mu=10, theta=0.1, sigma=0.2, dt=0.01)
|
||||
policy.change_phase(RunPhase.TRAIN)
|
||||
vals = []
|
||||
for i in range(50000):
|
||||
current_noise = policy.get_action(np.zeros((10,)))
|
||||
vals.append(current_noise)
|
||||
assert np.all(np.abs(np.mean(vals, axis=0) - 10) < 1)
|
||||
|
||||
# plot the noise values - only used for understanding how the noise actually looks
|
||||
# import matplotlib.pyplot as plt
|
||||
# vals = np.array(vals)
|
||||
# for i in range(10):
|
||||
# plt.plot(list(range(10000)), vals[:, i])
|
||||
# plt.plot(list(range(10000)), vals[:, i])
|
||||
# plt.plot(list(range(10000)), vals[:, i])
|
||||
# plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_get_action()
|
||||
0
rl_coach/tests/filters/__init__.py
Normal file
0
rl_coach/tests/filters/__init__.py
Normal file
0
rl_coach/tests/filters/action/__init__.py
Normal file
0
rl_coach/tests/filters/action/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.filters.action.attention_discretization import AttentionDiscretization
|
||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace, AttentionActionSpace
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
filter = AttentionDiscretization(2)
|
||||
|
||||
# passing an output space that is wrong
|
||||
with pytest.raises(ValueError):
|
||||
filter.validate_output_action_space(DiscreteActionSpace(10))
|
||||
with pytest.raises(ValueError):
|
||||
filter.validate_output_action_space(BoxActionSpace(10))
|
||||
|
||||
# 1 dimensional box
|
||||
output_space = AttentionActionSpace(2, 0, 83)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
assert np.all(filter.target_actions == np.array([[[0., 0.], [41.5, 41.5]],
|
||||
[[0., 41.5], [41.5, 83.]],
|
||||
[[41.5, 0], [83., 41.5]],
|
||||
[[41.5, 41.5], [83., 83.]]]))
|
||||
assert input_space.actions == list(range(4))
|
||||
|
||||
action = 2
|
||||
|
||||
result = filter.filter(action)
|
||||
assert np.all(result == np.array([[41.5, 0], [83., 41.5]]))
|
||||
assert output_space.val_matches_space_definition(result)
|
||||
|
||||
# force int bins
|
||||
filter = AttentionDiscretization(2, force_int_bins=True)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
assert np.all(filter.target_actions == np.array([[[0., 0.], [41, 41]],
|
||||
[[0., 41], [41, 83.]],
|
||||
[[41, 0], [83., 41]],
|
||||
[[41, 41], [83., 83.]]]))
|
||||
45
rl_coach/tests/filters/action/test_box_discretization.py
Normal file
45
rl_coach/tests/filters/action/test_box_discretization.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.filters.action.box_discretization import BoxDiscretization
|
||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
filter = BoxDiscretization(9)
|
||||
|
||||
# passing an output space that is wrong
|
||||
with pytest.raises(ValueError):
|
||||
filter.validate_output_action_space(DiscreteActionSpace(10))
|
||||
|
||||
# 1 dimensional box
|
||||
output_space = BoxActionSpace(1, 5, 15)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
assert filter.target_actions == [[5.], [6.25], [7.5], [8.75], [10.], [11.25], [12.5], [13.75], [15.]]
|
||||
assert input_space.actions == list(range(9))
|
||||
|
||||
action = 2
|
||||
|
||||
result = filter.filter(action)
|
||||
assert result == [7.5]
|
||||
assert output_space.val_matches_space_definition(result)
|
||||
|
||||
# 2 dimensional box
|
||||
filter = BoxDiscretization(3)
|
||||
output_space = BoxActionSpace(2, 5, 15)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
assert filter.target_actions == [[5., 5.], [5., 10.], [5., 15.],
|
||||
[10., 5.], [10., 10.], [10., 15.],
|
||||
[15., 5.], [15., 10.], [15., 15.]]
|
||||
assert input_space.actions == list(range(9))
|
||||
|
||||
action = 2
|
||||
|
||||
result = filter.filter(action)
|
||||
assert result == [5., 15.]
|
||||
assert output_space.val_matches_space_definition(result)
|
||||
27
rl_coach/tests/filters/action/test_box_masking.py
Normal file
27
rl_coach/tests/filters/action/test_box_masking.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.filters.action.box_masking import BoxMasking
|
||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
filter = BoxMasking(10, 20)
|
||||
|
||||
# passing an output space that is wrong
|
||||
with pytest.raises(ValueError):
|
||||
filter.validate_output_action_space(DiscreteActionSpace(10))
|
||||
|
||||
# 1 dimensional box
|
||||
output_space = BoxActionSpace(1, 5, 30)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
action = np.array([2])
|
||||
result = filter.filter(action)
|
||||
assert result == np.array([12])
|
||||
assert output_space.val_matches_space_definition(result)
|
||||
|
||||
29
rl_coach/tests/filters/action/test_linear_box_to_box_map.py
Normal file
29
rl_coach/tests/filters/action/test_linear_box_to_box_map.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.filters.action.linear_box_to_box_map import LinearBoxToBoxMap
|
||||
from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
filter = LinearBoxToBoxMap(10, 20)
|
||||
|
||||
# passing an output space that is wrong
|
||||
with pytest.raises(ValueError):
|
||||
filter.validate_output_action_space(DiscreteActionSpace(10))
|
||||
|
||||
# 1 dimensional box
|
||||
output_space = BoxActionSpace(1, 5, 35)
|
||||
input_space = filter.get_unfiltered_action_space(output_space)
|
||||
|
||||
action = np.array([2])
|
||||
|
||||
action = np.array([12])
|
||||
result = filter.filter(action)
|
||||
assert result == np.array([11])
|
||||
assert output_space.val_matches_space_definition(result)
|
||||
|
||||
0
rl_coach/tests/filters/observation/__init__.py
Normal file
0
rl_coach/tests/filters/observation/__init__.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env_response():
|
||||
observation = np.random.rand(10, 20, 30)
|
||||
return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter(env_response):
|
||||
crop_low = np.array([0, 5, 10])
|
||||
crop_high = np.array([5, 10, 20])
|
||||
crop_filter = InputFilter()
|
||||
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
|
||||
|
||||
result = crop_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (5, 5, 10)
|
||||
|
||||
# validate the content of the filtered observation
|
||||
assert np.all(filtered_observation == unfiltered_observation[0:5, 5:10, 10:20])
|
||||
|
||||
# crop with -1 on some axes
|
||||
crop_low = np.array([0, 0, 0])
|
||||
crop_high = np.array([5, -1, -1])
|
||||
crop_filter = InputFilter()
|
||||
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
|
||||
|
||||
result = crop_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (5, 20, 30)
|
||||
|
||||
# validate the content of the filtered observation
|
||||
assert np.all(filtered_observation == unfiltered_observation[0:5, :, :])
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
crop_low = np.array([0, 5, 10])
|
||||
crop_high = np.array([5, 10, 20])
|
||||
crop_filter = InputFilter()
|
||||
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
|
||||
|
||||
observation_space = ObservationSpace(np.array([5, 10, 20]))
|
||||
filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# make sure the new observation space shape is calculated correctly
|
||||
assert np.all(filtered_observation_space.shape == np.array([5, 5, 10]))
|
||||
|
||||
# make sure the original observation space is unchanged
|
||||
assert np.all(observation_space.shape == np.array([5, 10, 20]))
|
||||
|
||||
# crop_high is bigger than the observation space
|
||||
high_error_observation_space = ObservationSpace(np.array([3, 8, 14]))
|
||||
with pytest.raises(ValueError):
|
||||
crop_filter.get_filtered_observation_space('observation', high_error_observation_space)
|
||||
|
||||
# crop_low is bigger than the observation space
|
||||
low_error_observation_space = ObservationSpace(np.array([3, 3, 10]))
|
||||
with pytest.raises(ValueError):
|
||||
crop_filter.get_filtered_observation_space('observation', low_error_observation_space)
|
||||
|
||||
# crop with -1 on some axes
|
||||
crop_low = np.array([0, 0, 0])
|
||||
crop_high = np.array([5, -1, -1])
|
||||
crop_filter = InputFilter()
|
||||
crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))
|
||||
|
||||
observation_space = ObservationSpace(np.array([5, 10, 20]))
|
||||
filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# make sure the new observation space shape is calculated correctly
|
||||
assert np.all(filtered_observation_space.shape == np.array([5, 10, 20]))
|
||||
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
|
||||
from rl_coach.spaces import VectorObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
# Keep
|
||||
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
|
||||
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
|
||||
reduction_filter = InputFilter()
|
||||
reduction_filter.add_observation_filter('observation', 'reduce',
|
||||
ObservationReductionBySubPartsNameFilter(
|
||||
["a"],
|
||||
ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
|
||||
))
|
||||
|
||||
reduction_filter.get_filtered_observation_space('observation', observation_space)
|
||||
result = reduction_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.shape == (3,)
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (1,)
|
||||
|
||||
# Discard
|
||||
reduction_filter = InputFilter()
|
||||
reduction_filter.add_observation_filter('observation', 'reduce',
|
||||
ObservationReductionBySubPartsNameFilter(
|
||||
["a"],
|
||||
ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
|
||||
))
|
||||
reduction_filter.get_filtered_observation_space('observation', observation_space)
|
||||
result = reduction_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.shape == (3,)
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (2,)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
# Keep
|
||||
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
|
||||
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
|
||||
reduction_filter = InputFilter()
|
||||
reduction_filter.add_observation_filter('observation', 'reduce',
|
||||
ObservationReductionBySubPartsNameFilter(
|
||||
["a"],
|
||||
ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep
|
||||
))
|
||||
|
||||
filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(filtered_observation_space.shape == np.array([1]))
|
||||
assert filtered_observation_space.measurements_names == ['a']
|
||||
|
||||
# Discard
|
||||
observation_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
|
||||
env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False)
|
||||
reduction_filter = InputFilter()
|
||||
reduction_filter.add_observation_filter('observation', 'reduce',
|
||||
ObservationReductionBySubPartsNameFilter(
|
||||
["a"],
|
||||
ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard
|
||||
))
|
||||
|
||||
filtered_observation_space = reduction_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(filtered_observation_space.shape == np.array([2]))
|
||||
assert filtered_observation_space.measurements_names == ['b', 'c']
|
||||
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter, RescaleInterpolationType
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:Conversion of')
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
# make an RGB observation smaller
|
||||
env_response = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
|
||||
|
||||
result = rescale_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.shape == (20, 30, 3)
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (10, 15, 3)
|
||||
|
||||
# make a grayscale observation bigger
|
||||
env_response = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleSizeByFactorFilter(2, RescaleInterpolationType.BILINEAR))
|
||||
result = rescale_filter.filter(env_response)[0]
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (40, 60)
|
||||
assert np.all(filtered_observation == np.ones([40, 60]))
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
# error on wrong number of channels
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleSizeByFactorFilter(0.5, RescaleInterpolationType.BILINEAR))
|
||||
observation_space = ObservationSpace(np.array([10, 20, 5]))
|
||||
with pytest.raises(ValueError):
|
||||
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# error on wrong number of dimensions
|
||||
observation_space = ObservationSpace(np.array([10, 20, 10, 3]))
|
||||
with pytest.raises(ValueError):
|
||||
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# make sure the new observation space shape is calculated correctly
|
||||
observation_space = ObservationSpace(np.array([10, 20, 3]))
|
||||
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
|
||||
|
||||
# make sure the original observation space is unchanged
|
||||
assert np.all(observation_space.shape == np.array([10, 20, 3]))
|
||||
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
|
||||
from rl_coach.spaces import ObservationSpace, ImageObservationSpace, PlanarMapsObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:Conversion of')
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
# make an RGB observation smaller
|
||||
transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 3]),
|
||||
high=255),
|
||||
RescaleInterpolationType.BILINEAR))
|
||||
|
||||
result = rescale_filter.filter(transition)[0]
|
||||
unfiltered_observation = transition.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.shape == (20, 30, 3)
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (10, 20, 3)
|
||||
assert np.all(filtered_observation == np.ones([10, 20, 3]))
|
||||
|
||||
# make a grayscale observation bigger
|
||||
transition = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False)
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([40, 60]),
|
||||
high=255),
|
||||
RescaleInterpolationType.BILINEAR))
|
||||
result = rescale_filter.filter(transition)[0]
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# validate the shape of the filtered observation
|
||||
assert filtered_observation.shape == (40, 60)
|
||||
assert np.all(filtered_observation == np.ones([40, 60]))
|
||||
|
||||
# rescale channels -> error
|
||||
# with pytest.raises(ValueError):
|
||||
# InputFilter(
|
||||
# observation_filters=OrderedDict([('rescale',
|
||||
# ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]),
|
||||
# high=255),
|
||||
# RescaleInterpolationType.BILINEAR))]))
|
||||
|
||||
# TODO: validate input to filter
|
||||
# different number of axes -> error
|
||||
# env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
|
||||
# rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20])),
|
||||
# RescaleInterpolationType.BILINEAR)
|
||||
# with pytest.raises(ValueError):
|
||||
# result = rescale_filter.filter(transition)
|
||||
|
||||
# channels first -> error
|
||||
with pytest.raises(ValueError):
|
||||
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([3, 10, 20]), high=255),
|
||||
RescaleInterpolationType.BILINEAR)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
# error on wrong number of channels
|
||||
with pytest.raises(ValueError):
|
||||
observation_filters = InputFilter()
|
||||
observation_filters.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 5]),
|
||||
high=255),
|
||||
RescaleInterpolationType.BILINEAR))
|
||||
|
||||
# mismatch and wrong number of channels
|
||||
rescale_filter = InputFilter()
|
||||
rescale_filter.add_observation_filter('observation', 'rescale',
|
||||
ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([5, 10, 3]),
|
||||
high=255),
|
||||
RescaleInterpolationType.BILINEAR))
|
||||
|
||||
observation_space = PlanarMapsObservationSpace(np.array([10, 20, 5]), low=0, high=255)
|
||||
with pytest.raises(ValueError):
|
||||
rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# error on wrong number of dimensions
|
||||
observation_space = ObservationSpace(np.array([10, 20, 10, 3]), high=255)
|
||||
with pytest.raises(ValueError):
|
||||
rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# make sure the new observation space shape is calculated correctly
|
||||
observation_space = ImageObservationSpace(np.array([10, 20, 3]), high=255)
|
||||
filtered_observation_space = rescale_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))
|
||||
|
||||
# make sure the original observation space is unchanged
|
||||
assert np.all(observation_space.shape == np.array([10, 20, 3]))
|
||||
|
||||
# TODO: test that the type of the observation space stays the same
|
||||
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
@pytest.fixture
|
||||
def rgb_to_y_filter():
|
||||
rgb_to_y_filter = InputFilter()
|
||||
rgb_to_y_filter.add_observation_filter('observation', 'rgb_to_y', ObservationRGBToYFilter())
|
||||
return rgb_to_y_filter
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter(rgb_to_y_filter):
|
||||
# convert RGB observation to graysacle
|
||||
observation = np.random.rand(20, 30, 3)*255.0
|
||||
transition = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
result = rgb_to_y_filter.filter(transition)[0]
|
||||
unfiltered_observation = transition.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.shape == (20, 30, 3)
|
||||
|
||||
# make sure the filtering is done correctly
|
||||
assert filtered_observation.shape == (20, 30)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space(rgb_to_y_filter):
|
||||
# error on observation space which are not RGB
|
||||
observation_space = ObservationSpace(np.array([1, 2, 4]), 0, 100)
|
||||
with pytest.raises(ValueError):
|
||||
rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
|
||||
result = rgb_to_y_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(result.shape == np.array([1, 2]))
|
||||
@@ -0,0 +1,72 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_squeeze_filter import ObservationSqueezeFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
# make an RGB observation smaller
|
||||
squeeze_filter = InputFilter()
|
||||
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
|
||||
squeeze_filter_with_axis = InputFilter()
|
||||
squeeze_filter_with_axis.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(2))
|
||||
|
||||
observation = np.random.rand(20, 30, 1, 3)
|
||||
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
result = squeeze_filter.filter(env_response)[0]
|
||||
result_with_axis = squeeze_filter_with_axis.filter(env_response)[0]
|
||||
unfiltered_observation_shape = env_response.next_state['observation'].shape
|
||||
filtered_observation_shape = result.next_state['observation'].shape
|
||||
filtered_observation_with_axis_shape = result_with_axis.next_state['observation'].shape
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation_shape == observation.shape
|
||||
|
||||
# make sure the filtering is done correctly
|
||||
assert filtered_observation_shape == (20, 30, 3)
|
||||
assert filtered_observation_with_axis_shape == (20, 30, 3)
|
||||
|
||||
observation = np.random.rand(1, 30, 1, 3)
|
||||
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
result = squeeze_filter.filter(env_response)[0]
|
||||
assert result.next_state['observation'].shape == (30, 3)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
# error on observation space with shape not matching the filter squeeze axis configuration
|
||||
squeeze_filter = InputFilter()
|
||||
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(axis=3))
|
||||
|
||||
observation_space = ObservationSpace(np.array([20, 1, 30, 3]), 0, 100)
|
||||
small_observation_space = ObservationSpace(np.array([20, 1, 30]), 0, 100)
|
||||
with pytest.raises(ValueError):
|
||||
squeeze_filter.get_filtered_observation_space('observation', observation_space)
|
||||
squeeze_filter.get_filtered_observation_space('observation', small_observation_space)
|
||||
|
||||
# verify output observation space is correct
|
||||
observation_space = ObservationSpace(np.array([1, 2, 3, 1]), 0, 200)
|
||||
result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(result.shape == np.array([1, 2, 3]))
|
||||
|
||||
squeeze_filter = InputFilter()
|
||||
squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter())
|
||||
|
||||
result = squeeze_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(result.shape == np.array([2, 3]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_filter()
|
||||
test_get_filtered_observation_space()
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env_response():
|
||||
observation = np.random.rand(20, 30, 1)
|
||||
return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def stack_filter():
|
||||
stack_filter = InputFilter()
|
||||
stack_filter.add_observation_filter('observation', 'stack', ObservationStackingFilter(4, stacking_axis=-1))
|
||||
return stack_filter
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter(stack_filter, env_response):
|
||||
# stack observation on empty stack
|
||||
result = stack_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# validate that the shape of the unfiltered observation is unchanged
|
||||
assert unfiltered_observation.shape == (20, 30, 1)
|
||||
assert np.array(filtered_observation).shape == (20, 30, 1, 4)
|
||||
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
|
||||
|
||||
# stack observation on non-empty stack
|
||||
result = stack_filter.filter(env_response)[0]
|
||||
filtered_observation = result.next_state['observation']
|
||||
assert np.array(filtered_observation).shape == (20, 30, 1, 4)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space(stack_filter, env_response):
|
||||
observation_space = ObservationSpace(np.array([5, 10, 20]))
|
||||
filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# make sure the new observation space shape is calculated correctly
|
||||
assert np.all(filtered_observation_space.shape == np.array([5, 10, 20, 4]))
|
||||
|
||||
# make sure the original observation space is unchanged
|
||||
assert np.all(observation_space.shape == np.array([5, 10, 20]))
|
||||
|
||||
# call after stack is already created with non-matching shape -> error
|
||||
result = stack_filter.filter(env_response)[0]
|
||||
with pytest.raises(ValueError):
|
||||
filtered_observation_space = stack_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_reset(stack_filter, env_response):
|
||||
# stack observation on empty stack
|
||||
result = stack_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
|
||||
|
||||
# reset and make sure the outputs are correct
|
||||
stack_filter.reset()
|
||||
unfiltered_observation = np.random.rand(20, 30, 1)
|
||||
new_env_response = EnvResponse(next_state={'observation': unfiltered_observation}, reward=0, game_over=False)
|
||||
result = stack_filter.filter(new_env_response)[0]
|
||||
filtered_observation = result.next_state['observation']
|
||||
assert np.all(np.array(filtered_observation)[:, :, :, 0] == unfiltered_observation)
|
||||
assert np.all(np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
|
||||
@@ -0,0 +1,50 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter
|
||||
from rl_coach.spaces import ObservationSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
# make an RGB observation smaller
|
||||
uint8_filter = InputFilter()
|
||||
uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=255))
|
||||
|
||||
observation = np.random.rand(20, 30, 3)*255.0
|
||||
env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)
|
||||
|
||||
result = uint8_filter.filter(env_response)[0]
|
||||
unfiltered_observation = env_response.next_state['observation']
|
||||
filtered_observation = result.next_state['observation']
|
||||
|
||||
# make sure the original observation is unchanged
|
||||
assert unfiltered_observation.dtype == 'float64'
|
||||
|
||||
# make sure the filtering is done correctly
|
||||
assert filtered_observation.dtype == 'uint8'
|
||||
assert np.all(filtered_observation == observation.astype('uint8'))
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_observation_space():
|
||||
# error on observation space with values not matching the filter configuration
|
||||
uint8_filter = InputFilter()
|
||||
uint8_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=200))
|
||||
|
||||
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
|
||||
with pytest.raises(ValueError):
|
||||
uint8_filter.get_filtered_observation_space('observation', observation_space)
|
||||
|
||||
# verify output observation space is correct
|
||||
observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 200)
|
||||
result = uint8_filter.get_filtered_observation_space('observation', observation_space)
|
||||
assert np.all(result.high == 255)
|
||||
assert np.all(result.low == 0)
|
||||
assert np.all(result.shape == observation_space.shape)
|
||||
0
rl_coach/tests/filters/reward/__init__.py
Normal file
0
rl_coach/tests/filters/reward/__init__.py
Normal file
74
rl_coach/tests/filters/reward/test_reward_clipping_filter.py
Normal file
74
rl_coach/tests/filters/reward/test_reward_clipping_filter.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
|
||||
from rl_coach.spaces import RewardSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
|
||||
from collections import OrderedDict
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def clip_filter():
|
||||
return InputFilter(reward_filters=OrderedDict([('clip', RewardClippingFilter(2, 10))]))
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter(clip_filter):
|
||||
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
|
||||
result = clip_filter.filter(transition)[0]
|
||||
unfiltered_reward = transition.reward
|
||||
filtered_reward = result.reward
|
||||
|
||||
# validate that the reward was clipped correctly
|
||||
assert filtered_reward == 10
|
||||
|
||||
# make sure the original reward is unchanged
|
||||
assert unfiltered_reward == 100
|
||||
|
||||
# reward in bounds
|
||||
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=5, game_over=False)
|
||||
result = clip_filter.filter(transition)[0]
|
||||
assert result.reward == 5
|
||||
|
||||
# reward below bounds
|
||||
transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-5, game_over=False)
|
||||
result = clip_filter.filter(transition)[0]
|
||||
assert result.reward == 2
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_reward_space(clip_filter):
|
||||
# reward is clipped
|
||||
reward_space = RewardSpace(1, -100, 100)
|
||||
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
|
||||
|
||||
# make sure the new reward space shape is calculated correctly
|
||||
assert filtered_reward_space.shape == 1
|
||||
assert filtered_reward_space.low == 2
|
||||
assert filtered_reward_space.high == 10
|
||||
|
||||
# reward is unclipped
|
||||
reward_space = RewardSpace(1, 5, 7)
|
||||
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
|
||||
|
||||
# make sure the new reward space shape is calculated correctly
|
||||
assert filtered_reward_space.shape == 1
|
||||
assert filtered_reward_space.low == 5
|
||||
assert filtered_reward_space.high == 7
|
||||
|
||||
# infinite reward is clipped
|
||||
reward_space = RewardSpace(1, -np.inf, np.inf)
|
||||
filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)
|
||||
|
||||
# make sure the new reward space shape is calculated correctly
|
||||
assert filtered_reward_space.shape == 1
|
||||
assert filtered_reward_space.low == 2
|
||||
assert filtered_reward_space.high == 10
|
||||
|
||||
|
||||
56
rl_coach/tests/filters/reward/test_reward_rescale_filter.py
Normal file
56
rl_coach/tests/filters/reward/test_reward_rescale_filter.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.filters.reward.reward_rescale_filter import RewardRescaleFilter
|
||||
from rl_coach.spaces import RewardSpace
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_filter():
|
||||
rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
|
||||
env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
|
||||
print(rescale_filter.observation_filters)
|
||||
result = rescale_filter.filter(env_response)[0]
|
||||
unfiltered_reward = env_response.reward
|
||||
filtered_reward = result.reward
|
||||
|
||||
# validate that the reward was clipped correctly
|
||||
assert filtered_reward == 10
|
||||
|
||||
# make sure the original reward is unchanged
|
||||
assert unfiltered_reward == 100
|
||||
|
||||
# negative reward
|
||||
env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False)
|
||||
result = rescale_filter.filter(env_response)[0]
|
||||
assert result.reward == -5
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_get_filtered_reward_space():
|
||||
rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
|
||||
|
||||
# reward is clipped
|
||||
reward_space = RewardSpace(1, -100, 100)
|
||||
filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
|
||||
|
||||
# make sure the new reward space shape is calculated correctly
|
||||
assert filtered_reward_space.shape == 1
|
||||
assert filtered_reward_space.low == -10
|
||||
assert filtered_reward_space.high == 10
|
||||
|
||||
# unbounded rewards
|
||||
reward_space = RewardSpace(1, -np.inf, np.inf)
|
||||
filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)
|
||||
|
||||
# make sure the new reward space shape is calculated correctly
|
||||
assert filtered_reward_space.shape == 1
|
||||
assert filtered_reward_space.low == -np.inf
|
||||
assert filtered_reward_space.high == np.inf
|
||||
70
rl_coach/tests/filters/test_filters_stacking.py
Normal file
70
rl_coach/tests/filters/test_filters_stacking.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter, RescaleInterpolationType
|
||||
from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter
|
||||
from rl_coach.filters.reward.reward_clipping_filter import RewardClippingFilter
|
||||
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter
|
||||
from rl_coach.filters.filter import InputFilter
|
||||
from rl_coach.spaces import ImageObservationSpace
|
||||
import numpy as np
|
||||
from rl_coach.core_types import EnvResponse
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:Conversion of')
|
||||
@pytest.mark.unit_test
|
||||
def test_filter_stacking():
|
||||
# test that filter stacking works fine by taking as input a transition with:
|
||||
# - an observation of shape 210x160,
|
||||
# - a reward of 100
|
||||
# filtering it by:
|
||||
# - rescaling the observation to 110x84
|
||||
# - cropping the observation to 84x84
|
||||
# - clipping the reward to 1
|
||||
# - stacking 4 observations to get 84x84x4
|
||||
|
||||
env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False)
|
||||
|
||||
filter1 = ObservationRescaleToSizeFilter(
|
||||
output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255),
|
||||
rescaling_interpolation_type=RescaleInterpolationType.BILINEAR
|
||||
)
|
||||
|
||||
filter2 = ObservationCropFilter(
|
||||
crop_low=np.array([16, 0]),
|
||||
crop_high=np.array([100, 84])
|
||||
)
|
||||
|
||||
filter3 = RewardClippingFilter(
|
||||
clipping_low=-1,
|
||||
clipping_high=1
|
||||
)
|
||||
|
||||
output_filter = ObservationStackingFilter(
|
||||
stack_size=4,
|
||||
stacking_axis=-1
|
||||
)
|
||||
|
||||
input_filter = InputFilter(
|
||||
observation_filters={
|
||||
"observation": OrderedDict([
|
||||
("filter1", filter1),
|
||||
("filter2", filter2),
|
||||
("output_filter", output_filter)
|
||||
])},
|
||||
reward_filters=OrderedDict([
|
||||
("filter3", filter3)
|
||||
])
|
||||
)
|
||||
|
||||
result = input_filter.filter(env_response)[0]
|
||||
observation = np.array(result.next_state['observation'])
|
||||
assert observation.shape == (84, 84, 4)
|
||||
assert np.all(observation == np.ones([84, 84, 4]))
|
||||
assert result.reward == 1
|
||||
|
||||
|
||||
355
rl_coach/tests/golden_tests.py
Normal file
355
rl_coach/tests/golden_tests.py
Normal file
@@ -0,0 +1,355 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from importlib import import_module
|
||||
from os import path
|
||||
sys.path.append('.')
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import time
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
def read_csv_paths(test_path, filename_pattern, read_csv_tries=50):
|
||||
csv_paths = []
|
||||
tries_counter = 0
|
||||
while not csv_paths:
|
||||
csv_paths = glob.glob(path.join(test_path, '*', filename_pattern))
|
||||
if tries_counter > read_csv_tries:
|
||||
break
|
||||
tries_counter += 1
|
||||
time.sleep(1)
|
||||
return csv_paths
|
||||
|
||||
|
||||
def clean_df(df):
|
||||
if 'Wall-Clock Time' in df.keys():
|
||||
df.drop(['Wall-Clock Time'], 1, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args):
|
||||
percentage = int((100 * last_num_episodes) / preset_validation_params.max_episodes_to_achieve_reward)
|
||||
sys.stdout.write("\rReward: ({}/{})".format(round(averaged_rewards[-1], 1),
|
||||
preset_validation_params.min_reward_threshold))
|
||||
sys.stdout.write(' Time (sec): ({}/{})'.format(round(time.time() - start_time, 2), args.time_limit))
|
||||
sys.stdout.write(' Episode: ({}/{})'.format(last_num_episodes,
|
||||
preset_validation_params.max_episodes_to_achieve_reward))
|
||||
sys.stdout.write(
|
||||
' {}%|{}{}| '.format(percentage, '#' * int(percentage / 10), ' ' * (10 - int(percentage / 10))))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def perform_reward_based_tests(args, preset_validation_params, preset_name):
|
||||
win_size = 10
|
||||
|
||||
test_name = '__test_reward'
|
||||
test_path = os.path.join('./experiments', test_name)
|
||||
if path.exists(test_path):
|
||||
shutil.rmtree(test_path)
|
||||
|
||||
# run the experiment in a separate thread
|
||||
screen.log_title("Running test {}".format(preset_name))
|
||||
log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
|
||||
cmd = (
|
||||
'python3 rl_coach/coach.py '
|
||||
'-p {preset_name} '
|
||||
'-e {test_name} '
|
||||
'-n {num_workers} '
|
||||
'--seed 0 '
|
||||
'-c '
|
||||
'{level} '
|
||||
'&> {log_file_name} '
|
||||
).format(
|
||||
preset_name=preset_name,
|
||||
test_name=test_name,
|
||||
num_workers=preset_validation_params.num_workers,
|
||||
log_file_name=log_file_name,
|
||||
level='-lvl ' + preset_validation_params.reward_test_level if preset_validation_params.reward_test_level else ''
|
||||
)
|
||||
|
||||
p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
reward_str = 'Evaluation Reward'
|
||||
if preset_validation_params.num_workers > 1:
|
||||
filename_pattern = 'worker_0*.csv'
|
||||
else:
|
||||
filename_pattern = '*.csv'
|
||||
|
||||
test_passed = False
|
||||
|
||||
# get the csv with the results
|
||||
csv_paths = read_csv_paths(test_path, filename_pattern)
|
||||
|
||||
if csv_paths:
|
||||
csv_path = csv_paths[0]
|
||||
|
||||
# verify results
|
||||
csv = None
|
||||
time.sleep(1)
|
||||
averaged_rewards = [0]
|
||||
|
||||
last_num_episodes = 0
|
||||
|
||||
if not args.no_progress_bar:
|
||||
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
|
||||
|
||||
while csv is None or (csv['Episode #'].values[
|
||||
-1] < preset_validation_params.max_episodes_to_achieve_reward and time.time() - start_time < args.time_limit):
|
||||
try:
|
||||
csv = pd.read_csv(csv_path)
|
||||
except:
|
||||
# sometimes the csv is being written at the same time we are
|
||||
# trying to read it. no problem -> try again
|
||||
continue
|
||||
|
||||
if reward_str not in csv.keys():
|
||||
continue
|
||||
|
||||
rewards = csv[reward_str].values
|
||||
rewards = rewards[~np.isnan(rewards)]
|
||||
|
||||
if len(rewards) >= 1:
|
||||
averaged_rewards = np.convolve(rewards, np.ones(min(len(rewards), win_size)) / win_size, mode='valid')
|
||||
else:
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
if not args.no_progress_bar:
|
||||
print_progress(averaged_rewards, last_num_episodes, preset_validation_params, start_time, args)
|
||||
|
||||
if csv['Episode #'].shape[0] - last_num_episodes <= 0:
|
||||
continue
|
||||
|
||||
last_num_episodes = csv['Episode #'].values[-1]
|
||||
|
||||
# check if reward is enough
|
||||
if np.any(averaged_rewards >= preset_validation_params.min_reward_threshold):
|
||||
test_passed = True
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
# kill test and print result
|
||||
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
|
||||
screen.log('')
|
||||
if test_passed:
|
||||
screen.success("Passed successfully")
|
||||
else:
|
||||
if time.time() - start_time > args.time_limit:
|
||||
screen.error("Failed due to exceeding time limit", crash=False)
|
||||
if args.verbose:
|
||||
screen.error("command exitcode: {}".format(p.returncode), crash=False)
|
||||
screen.error(open(log_file_name).read(), crash=False)
|
||||
elif csv_paths:
|
||||
screen.error("Failed due to insufficient reward", crash=False)
|
||||
if args.verbose:
|
||||
screen.error("command exitcode: {}".format(p.returncode), crash=False)
|
||||
screen.error(open(log_file_name).read(), crash=False)
|
||||
screen.error("preset_validation_params.max_episodes_to_achieve_reward: {}".format(
|
||||
preset_validation_params.max_episodes_to_achieve_reward), crash=False)
|
||||
screen.error("preset_validation_params.min_reward_threshold: {}".format(
|
||||
preset_validation_params.min_reward_threshold), crash=False)
|
||||
screen.error("averaged_rewards: {}".format(averaged_rewards), crash=False)
|
||||
screen.error("episode number: {}".format(csv['Episode #'].values[-1]), crash=False)
|
||||
else:
|
||||
screen.error("csv file never found", crash=False)
|
||||
if args.verbose:
|
||||
screen.error("command exitcode: {}".format(p.returncode), crash=False)
|
||||
screen.error(open(log_file_name).read(), crash=False)
|
||||
|
||||
shutil.rmtree(test_path)
|
||||
os.remove(log_file_name)
|
||||
return test_passed
|
||||
|
||||
|
||||
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
|
||||
test_name = '__test_trace'
|
||||
test_path = os.path.join('./experiments', test_name)
|
||||
if path.exists(test_path):
|
||||
shutil.rmtree(test_path)
|
||||
|
||||
# run the experiment in a separate thread
|
||||
screen.log_title("Running test {}{}".format(preset_name, ' - ' + level if level else ''))
|
||||
log_file_name = 'test_log_{preset_name}.txt'.format(preset_name=preset_name)
|
||||
|
||||
cmd = (
|
||||
'python3 rl_coach/coach.py '
|
||||
'-p {preset_name} '
|
||||
'-e {test_name} '
|
||||
'--seed 42 '
|
||||
'-c '
|
||||
'--no_summary '
|
||||
'-cp {custom_param} '
|
||||
'{level} '
|
||||
'&> {log_file_name} '
|
||||
).format(
|
||||
preset_name=preset_name,
|
||||
test_name=test_name,
|
||||
log_file_name=log_file_name,
|
||||
level='-lvl ' + level if level else '',
|
||||
custom_param='\"improve_steps=EnvironmentSteps({n});'
|
||||
'steps_between_evaluation_periods=EnvironmentSteps({n});'
|
||||
'evaluation_steps=EnvironmentSteps(1);'
|
||||
'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps)
|
||||
)
|
||||
|
||||
p = subprocess.Popen(cmd, shell=True, executable="/bin/bash", preexec_fn=os.setsid)
|
||||
p.wait()
|
||||
|
||||
filename_pattern = '*.csv'
|
||||
|
||||
# get the csv with the results
|
||||
csv_paths = read_csv_paths(test_path, filename_pattern)
|
||||
|
||||
test_passed = False
|
||||
if not csv_paths:
|
||||
screen.error("csv file never found", crash=False)
|
||||
if args.verbose:
|
||||
screen.error("command exitcode: {}".format(p.returncode), crash=False)
|
||||
screen.error(open(log_file_name).read(), crash=False)
|
||||
else:
|
||||
trace_path = os.path.join('./rl_coach', 'traces', preset_name + '_' + level if level else preset_name, '')
|
||||
if not os.path.exists(trace_path):
|
||||
screen.log('No trace found, creating new trace in: {}'.format(trace_path))
|
||||
os.makedirs(os.path.dirname(trace_path))
|
||||
df = pd.read_csv(csv_paths[0])
|
||||
df = clean_df(df)
|
||||
df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
|
||||
screen.success("Successfully created new trace.")
|
||||
test_passed = True
|
||||
else:
|
||||
test_df = pd.read_csv(csv_paths[0])
|
||||
test_df = clean_df(test_df)
|
||||
new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
|
||||
test_df.to_csv(new_trace_csv_path, index=False)
|
||||
test_df = pd.read_csv(new_trace_csv_path)
|
||||
trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
|
||||
trace_csv_path = trace_csv_path[0]
|
||||
trace_df = pd.read_csv(trace_csv_path)
|
||||
test_passed = test_df.equals(trace_df)
|
||||
if test_passed:
|
||||
screen.success("Passed successfully.")
|
||||
os.remove(new_trace_csv_path)
|
||||
else:
|
||||
screen.error("Trace test failed.", crash=False)
|
||||
if args.overwrite:
|
||||
os.remove(trace_csv_path)
|
||||
os.rename(new_trace_csv_path, trace_csv_path)
|
||||
screen.error("Overwriting old trace.", crash=False)
|
||||
else:
|
||||
screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)
|
||||
|
||||
shutil.rmtree(test_path)
|
||||
os.remove(log_file_name)
|
||||
return test_passed
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-t', '--trace',
|
||||
help="(flag) perform trace based testing",
|
||||
action='store_true')
|
||||
parser.add_argument('-p', '--preset',
|
||||
help="(string) Name of a preset to run (as configured in presets.py)",
|
||||
default=None,
|
||||
type=str)
|
||||
parser.add_argument('-ip', '--ignore_presets',
|
||||
help="(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
|
||||
default=None,
|
||||
type=str)
|
||||
parser.add_argument('-v', '--verbose',
|
||||
help="(flag) display verbose logs in the event of an error",
|
||||
action='store_true')
|
||||
parser.add_argument('--stop_after_first_failure',
|
||||
help="(flag) stop executing tests after the first error",
|
||||
action='store_true')
|
||||
parser.add_argument('-tl', '--time_limit',
|
||||
help="time limit for each test in minutes",
|
||||
default=40, # setting time limit to be so high due to DDPG being very slow - its tests are long
|
||||
type=int)
|
||||
parser.add_argument('-np', '--no_progress_bar',
|
||||
help="(flag) Don't print the progress bar (makes jenkins logs more readable)",
|
||||
action='store_true')
|
||||
parser.add_argument('-ow', '--overwrite',
|
||||
help="(flag) overwrite old trace with new ones in trace testing mode",
|
||||
action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.preset is not None:
|
||||
presets_lists = [args.preset]
|
||||
else:
|
||||
# presets_lists = list_all_classes_in_module(presets)
|
||||
presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
|
||||
f[-3:] == '.py' and not f == '__init__.py']
|
||||
|
||||
fail_count = 0
|
||||
test_count = 0
|
||||
|
||||
args.time_limit = 60 * args.time_limit
|
||||
|
||||
if args.ignore_presets is not None:
|
||||
presets_to_ignore = args.ignore_presets.split(',')
|
||||
else:
|
||||
presets_to_ignore = []
|
||||
for idx, preset_name in enumerate(sorted(presets_lists)):
|
||||
if args.stop_after_first_failure and fail_count > 0:
|
||||
break
|
||||
if preset_name not in presets_to_ignore:
|
||||
try:
|
||||
preset = import_module('rl_coach.presets.{}'.format(preset_name))
|
||||
except:
|
||||
if args.verbose:
|
||||
screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
|
||||
continue
|
||||
|
||||
preset_validation_params = preset.graph_manager.preset_validation_params
|
||||
if not args.trace and not preset_validation_params.test:
|
||||
continue
|
||||
|
||||
if args.trace:
|
||||
num_env_steps = preset_validation_params.trace_max_env_steps
|
||||
if preset_validation_params.trace_test_levels:
|
||||
for level in preset_validation_params.trace_test_levels:
|
||||
test_count += 1
|
||||
test_passed = perform_trace_based_tests(args, preset_name, num_env_steps, level)
|
||||
else:
|
||||
test_count += 1
|
||||
test_passed = perform_trace_based_tests(args, preset_name, num_env_steps)
|
||||
else:
|
||||
test_passed = perform_reward_based_tests(args, preset_validation_params, preset_name)
|
||||
if not test_passed:
|
||||
fail_count += 1
|
||||
|
||||
screen.separator()
|
||||
if fail_count == 0:
|
||||
screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
|
||||
else:
|
||||
screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
0
rl_coach/tests/graph_managers/__init__.py
Normal file
0
rl_coach/tests/graph_managers/__init__.py
Normal file
52
rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
Normal file
52
rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
import tensorflow as tf
|
||||
from rl_coach.base_parameters import TaskParameters, DistributedTaskParameters
|
||||
from rl_coach.utils import get_open_port
|
||||
from multiprocessing import Process
|
||||
from tensorflow import logging
|
||||
import pytest
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_basic_rl_graph_manager_with_pong_a3c():
|
||||
tf.reset_default_graph()
|
||||
from rl_coach.presets.Atari_A3C import graph_manager
|
||||
assert graph_manager
|
||||
graph_manager.env_params.level = "PongDeterministic-v4"
|
||||
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
|
||||
experiment_path="./experiments/test"))
|
||||
# graph_manager.improve()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_basic_rl_graph_manager_with_pong_nec():
|
||||
tf.reset_default_graph()
|
||||
from rl_coach.presets.Atari_NEC import graph_manager
|
||||
assert graph_manager
|
||||
graph_manager.env_params.level = "PongDeterministic-v4"
|
||||
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
|
||||
experiment_path="./experiments/test"))
|
||||
# graph_manager.improve()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_basic_rl_graph_manager_with_cartpole_dqn():
|
||||
tf.reset_default_graph()
|
||||
from rl_coach.presets.CartPole_DQN import graph_manager
|
||||
assert graph_manager
|
||||
graph_manager.create_graph(task_parameters=TaskParameters(framework_type="tensorflow",
|
||||
experiment_path="./experiments/test"))
|
||||
# graph_manager.improve()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
# test_basic_rl_graph_manager_with_pong_a3c()
|
||||
# test_basic_rl_graph_manager_with_ant_a3c()
|
||||
# test_basic_rl_graph_manager_with_pong_nec()
|
||||
# test_basic_rl_graph_manager_with_cartpole_dqn()
|
||||
#test_basic_rl_graph_manager_multithreaded_with_pong_a3c()
|
||||
#test_basic_rl_graph_manager_with_doom_basic_dqn()
|
||||
0
rl_coach/tests/memories/__init__.py
Normal file
0
rl_coach/tests/memories/__init__.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
import time
|
||||
from rl_coach.memories.non_episodic.differentiable_neural_dictionary import QDND
|
||||
import tensorflow as tf
|
||||
|
||||
NUM_ACTIONS = 3
|
||||
NUM_DND_ENTRIES_TO_ADD = 10000
|
||||
EMBEDDING_SIZE = 512
|
||||
NUM_SAMPLED_EMBEDDINGS = 500
|
||||
NUM_NEIGHBORS = 10
|
||||
DND_SIZE = 500000
|
||||
|
||||
@pytest.fixture()
|
||||
def dnd():
|
||||
return QDND(
|
||||
DND_SIZE,
|
||||
EMBEDDING_SIZE,
|
||||
NUM_ACTIONS,
|
||||
0.1,
|
||||
key_error_threshold=0,
|
||||
learning_rate=0.0001,
|
||||
num_neighbors=NUM_NEIGHBORS
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_random_sample_from_dnd(dnd: QDND):
|
||||
# store single non terminal transition
|
||||
embeddings = [np.random.rand(EMBEDDING_SIZE) for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
actions = [np.random.randint(NUM_ACTIONS) for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
values = [np.random.rand() for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
dnd.add(embeddings, actions, values)
|
||||
dnd_embeddings, dnd_values, dnd_indices = dnd.query(embeddings[0:10], 0, NUM_NEIGHBORS)
|
||||
|
||||
# calculate_normalization_factor
|
||||
sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
|
||||
coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
|
||||
tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
|
||||
tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
|
||||
|
||||
sub = tf_current_embedding - tf_other_embeddings
|
||||
square = tf.square(sub)
|
||||
result = tf.reduce_sum(square)
|
||||
|
||||
|
||||
|
||||
###########################
|
||||
# more efficient method
|
||||
###########################
|
||||
sampled_embeddings_expanded = tf.placeholder(
|
||||
tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
|
||||
sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
|
||||
sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
|
||||
sub2 = sampled_embeddings_tiled - sampled_embeddings_transposed
|
||||
square2 = tf.square(sub2)
|
||||
result2 = tf.reduce_sum(square2)
|
||||
|
||||
config = tf.ConfigProto()
|
||||
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
||||
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
||||
|
||||
sess = tf.Session(config=config)
|
||||
|
||||
sum1 = 0
|
||||
start = time.time()
|
||||
for i in range(NUM_SAMPLED_EMBEDDINGS):
|
||||
curr_sampled_embedding = sampled_embeddings[i]
|
||||
other_embeddings = np.delete(sampled_embeddings, i, 0)
|
||||
sum1 += sess.run(result, feed_dict={tf_current_embedding: curr_sampled_embedding, tf_other_embeddings: other_embeddings})
|
||||
print("1st method: {} sec".format(time.time()-start))
|
||||
|
||||
start = time.time()
|
||||
sum2 = sess.run(result2, feed_dict={sampled_embeddings_expanded: np.expand_dims(sampled_embeddings,0)})
|
||||
print("2nd method: {} sec".format(time.time()-start))
|
||||
|
||||
# validate that results are equal
|
||||
print("sum1 = {}, sum2 = {}".format(sum1, sum2))
|
||||
|
||||
norm_factor = -0.5/(coefficient * sum2)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_random_sample_from_dnd(dnd())
|
||||
|
||||
97
rl_coach/tests/memories/test_hindsight_experience_replay.py
Normal file
97
rl_coach/tests/memories/test_hindsight_experience_replay.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters
|
||||
from rl_coach.spaces import GoalsSpace, ReachingGoal
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
# print(sys.path)
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import Transition, Episode
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplay, \
|
||||
HindsightGoalSelectionMethod
|
||||
|
||||
|
||||
#TODO: change from defining a new class to creating an instance from the parameters
|
||||
class Parameters(EpisodicHindsightExperienceReplayParameters):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.max_size = (MemoryGranularity.Transitions, 100)
|
||||
self.hindsight_transitions_per_regular_transition = 4
|
||||
self.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
|
||||
self.goals_space = GoalsSpace(goal_name='observation',
|
||||
reward_type=ReachingGoal(distance_from_goal_threshold=0.1),
|
||||
distance_metric=GoalsSpace.DistanceMetric.Euclidean)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def episode():
|
||||
episode = []
|
||||
for i in range(10):
|
||||
episode.append(Transition(
|
||||
state={'observation': np.array([i]), 'desired_goal': np.array([i]), 'achieved_goal': np.array([i])},
|
||||
action=i,
|
||||
))
|
||||
return episode
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def her():
|
||||
params = Parameters().__dict__
|
||||
|
||||
import inspect
|
||||
args = set(inspect.getfullargspec(EpisodicHindsightExperienceReplay.__init__).args).intersection(params)
|
||||
params = {k: params[k] for k in args}
|
||||
|
||||
return EpisodicHindsightExperienceReplay(**params)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sample_goal(her, episode):
|
||||
assert her._sample_goal(episode, 8) == 9
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sample_goal_range(her, episode):
|
||||
unseen_goals = set(range(1, 9))
|
||||
for _ in range(500):
|
||||
unseen_goals -= set([int(her._sample_goal(episode, 0))])
|
||||
if not unseen_goals:
|
||||
return
|
||||
|
||||
assert unseen_goals == set()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_update_episode(her):
|
||||
episode = Episode()
|
||||
for i in range(10):
|
||||
episode.insert(Transition(
|
||||
state={'observation': np.array([i]), 'desired_goal': np.array([i+1]), 'achieved_goal': np.array([i+1])},
|
||||
action=i,
|
||||
game_over=i == 9,
|
||||
reward=0 if i == 9 else -1,
|
||||
))
|
||||
|
||||
her.store_episode(episode)
|
||||
# print('her._num_transitions', her._num_transitions)
|
||||
|
||||
# 10 original transitions, and 9 transitions * 4 hindsight episodes
|
||||
assert her.num_transitions() == 10 + (4 * 9)
|
||||
|
||||
# make sure that the goal state was never sampled from the past
|
||||
for transition in her.transitions:
|
||||
assert transition.state['desired_goal'] > transition.state['observation']
|
||||
assert transition.next_state['desired_goal'] >= transition.next_state['observation']
|
||||
|
||||
if transition.reward == 0:
|
||||
assert transition.game_over
|
||||
else:
|
||||
assert not transition.game_over
|
||||
|
||||
test_update_episode(her())
|
||||
@@ -0,0 +1,93 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import SegmentTree
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sum_tree():
|
||||
# test power of 2 sum tree
|
||||
sum_tree = SegmentTree(size=4, operation=SegmentTree.Operation.SUM)
|
||||
sum_tree.add(10, "10")
|
||||
assert sum_tree.total_value() == 10
|
||||
sum_tree.add(20, "20")
|
||||
assert sum_tree.total_value() == 30
|
||||
sum_tree.add(5, "5")
|
||||
assert sum_tree.total_value() == 35
|
||||
sum_tree.add(7.5, "7.5")
|
||||
assert sum_tree.total_value() == 42.5
|
||||
sum_tree.add(2.5, "2.5")
|
||||
assert sum_tree.total_value() == 35
|
||||
sum_tree.add(5, "5")
|
||||
assert sum_tree.total_value() == 20
|
||||
|
||||
assert sum_tree.get(2) == (0, 2.5, '2.5')
|
||||
assert sum_tree.get(3) == (1, 5.0, '5')
|
||||
assert sum_tree.get(10) == (2, 5.0, '5')
|
||||
assert sum_tree.get(13) == (3, 7.5, '7.5')
|
||||
|
||||
sum_tree.update(2, 10)
|
||||
assert sum_tree.__str__() == "[25.]\n[ 7.5 17.5]\n[ 2.5 5. 10. 7.5]\n"
|
||||
|
||||
# test non power of 2 sum tree
|
||||
with pytest.raises(ValueError):
|
||||
sum_tree = SegmentTree(size=5, operation=SegmentTree.Operation.SUM)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_min_tree():
|
||||
min_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MIN)
|
||||
min_tree.add(10, "10")
|
||||
assert min_tree.total_value() == 10
|
||||
min_tree.add(20, "20")
|
||||
assert min_tree.total_value() == 10
|
||||
min_tree.add(5, "5")
|
||||
assert min_tree.total_value() == 5
|
||||
min_tree.add(7.5, "7.5")
|
||||
assert min_tree.total_value() == 5
|
||||
min_tree.add(2, "2")
|
||||
assert min_tree.total_value() == 2
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(5, "5")
|
||||
assert min_tree.total_value() == 3
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_max_tree():
|
||||
max_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MAX)
|
||||
max_tree.add(10, "10")
|
||||
assert max_tree.total_value() == 10
|
||||
max_tree.add(20, "20")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(5, "5")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(7.5, "7.5")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(2, "2")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(5, "5")
|
||||
assert max_tree.total_value() == 5
|
||||
|
||||
# update
|
||||
max_tree.update(1, 10)
|
||||
assert max_tree.total_value() == 10
|
||||
assert max_tree.__str__() == "[10.]\n[10. 3.]\n[ 5. 10. 3. 3.]\n"
|
||||
max_tree.update(1, 2)
|
||||
assert max_tree.total_value() == 5
|
||||
assert max_tree.__str__() == "[5.]\n[5. 3.]\n[5. 2. 3. 3.]\n"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sum_tree()
|
||||
test_min_tree()
|
||||
test_max_tree()
|
||||
81
rl_coach/tests/memories/test_single_episode_buffer.py
Normal file
81
rl_coach/tests/memories/test_single_episode_buffer.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import Transition
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBuffer
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def buffer():
|
||||
return SingleEpisodeBuffer()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_store_and_get(buffer: SingleEpisodeBuffer):
|
||||
# store single non terminal transition
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 0
|
||||
assert buffer.num_transitions_in_complete_episodes() == 0
|
||||
assert buffer.num_transitions() == 1
|
||||
|
||||
# get the single stored transition
|
||||
episode = buffer.get(0)
|
||||
assert episode.length() == 1
|
||||
assert episode.get_first_transition() is transition # check addresses are the same
|
||||
assert episode.get_last_transition() is transition # check addresses are the same
|
||||
|
||||
# store single terminal transition
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 1
|
||||
assert buffer.num_transitions_in_complete_episodes() == 2
|
||||
|
||||
# check that the episode is valid
|
||||
episode = buffer.get(0)
|
||||
assert episode.length() == 2
|
||||
assert episode.get_transition(0).total_return == 1 + 0.99
|
||||
assert episode.get_transition(1).total_return == 1
|
||||
assert buffer.mean_reward() == 1
|
||||
|
||||
# only one episode in the replay buffer
|
||||
episode = buffer.get(1)
|
||||
assert episode is None
|
||||
|
||||
# adding transitions after the first episode was closed
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=0, game_over=False)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 0
|
||||
assert buffer.num_transitions_in_complete_episodes() == 0
|
||||
|
||||
# still only one episode
|
||||
assert buffer.get(1) is None
|
||||
assert buffer.mean_reward() == 0
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_clean(buffer: SingleEpisodeBuffer):
|
||||
# add several transitions and then clean the buffer
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
|
||||
for i in range(10):
|
||||
buffer.store(transition)
|
||||
assert buffer.num_transitions() == 10
|
||||
buffer.clean()
|
||||
assert buffer.num_transitions() == 0
|
||||
|
||||
# add more transitions after the clean and make sure they were really cleaned
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
|
||||
buffer.store(transition)
|
||||
assert buffer.num_transitions() == 1
|
||||
assert buffer.num_transitions_in_complete_episodes() == 1
|
||||
assert buffer.num_complete_episodes() == 1
|
||||
for i in range(10):
|
||||
assert buffer.sample(1)[0] is transition
|
||||
0
rl_coach/tests/presets/__init__.py
Normal file
0
rl_coach/tests/presets/__init__.py
Normal file
56
rl_coach/tests/presets/test_presets.py
Normal file
56
rl_coach/tests/presets/test_presets.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import os
|
||||
import time
|
||||
import shutil
|
||||
from subprocess import Popen, DEVNULL
|
||||
from rl_coach.logger import screen
|
||||
|
||||
|
||||
@pytest.mark.integration_test
|
||||
def test_all_presets_are_running():
|
||||
# os.chdir("../../")
|
||||
test_failed = False
|
||||
all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py'])
|
||||
for preset in all_presets:
|
||||
print("Testing preset {}".format(preset))
|
||||
|
||||
# TODO: this is a temporary workaround for presets which define more than a single available level.
|
||||
# we should probably do this in a more robust way
|
||||
level = ""
|
||||
if "Atari" in preset:
|
||||
level = "breakout"
|
||||
elif "Mujoco" in preset:
|
||||
level = "inverted_pendulum"
|
||||
elif "ControlSuite" in preset:
|
||||
level = "pendulum:swingup"
|
||||
params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"]
|
||||
if level != "":
|
||||
params += ["-lvl", level]
|
||||
|
||||
p = Popen(params, stdout=DEVNULL)
|
||||
|
||||
# wait 10 seconds overhead of initialization etc.
|
||||
time.sleep(10)
|
||||
return_value = p.poll()
|
||||
|
||||
if return_value is None:
|
||||
screen.success("{} passed successfully".format(preset))
|
||||
else:
|
||||
test_failed = True
|
||||
screen.error("{} failed".format(preset), crash=False)
|
||||
|
||||
p.kill()
|
||||
if os.path.exists("experiments/.test"):
|
||||
shutil.rmtree("experiments/.test")
|
||||
|
||||
assert not test_failed
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_all_presets_are_running()
|
||||
5
rl_coach/tests/pytest.ini
Normal file
5
rl_coach/tests/pytest.ini
Normal file
@@ -0,0 +1,5 @@
|
||||
# content of pytest.ini
|
||||
[pytest]
|
||||
markers =
|
||||
unit_test: short test that checks that a module is acting correctly
|
||||
integration_test: long test that checks that the complete framework is running correctly
|
||||
106
rl_coach/tests/test_schedules.py
Normal file
106
rl_coach/tests/test_schedules.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from rl_coach.core_types import EnvironmentSteps
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.schedules import LinearSchedule, ConstantSchedule, ExponentialSchedule, PieceWiseSchedule
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_constant_schedule():
|
||||
schedule = ConstantSchedule(0.3)
|
||||
|
||||
# make sure the values in the constant schedule don't change over time
|
||||
for i in range(1000):
|
||||
assert schedule.initial_value == 0.3
|
||||
assert schedule.current_value == 0.3
|
||||
schedule.step()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_linear_schedule():
|
||||
# increasing schedule
|
||||
schedule = LinearSchedule(1, 3, 10)
|
||||
|
||||
# the schedule is defined in number of steps to get from 1 to 3 so there are 10 steps
|
||||
# the linspace is defined in number of bins between 1 and 3 so theres are 11 bins
|
||||
target_values = np.linspace(1, 3, 11)
|
||||
for i in range(10):
|
||||
# we round to 4 because there is a very small floating point division difference (1e-10)
|
||||
assert round(schedule.current_value, 4) == round(target_values[i], 4)
|
||||
schedule.step()
|
||||
|
||||
# make sure the value does not change after 10 steps
|
||||
for i in range(10):
|
||||
assert schedule.current_value == 3
|
||||
|
||||
# decreasing schedule
|
||||
schedule = LinearSchedule(3, 1, 10)
|
||||
|
||||
target_values = np.linspace(3, 1, 11)
|
||||
for i in range(10):
|
||||
# we round to 4 because there is a very small floating point division difference (1e-10)
|
||||
assert round(schedule.current_value, 4) == round(target_values[i], 4)
|
||||
schedule.step()
|
||||
|
||||
# make sure the value does not change after 10 steps
|
||||
for i in range(10):
|
||||
assert schedule.current_value == 1
|
||||
|
||||
# constant schedule
|
||||
schedule = LinearSchedule(3, 3, 10)
|
||||
|
||||
for i in range(10):
|
||||
# we round to 4 because there is a very small floating point division difference (1e-10)
|
||||
assert round(schedule.current_value, 4) == 3
|
||||
schedule.step()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_exponential_schedule():
|
||||
# decreasing schedule
|
||||
schedule = ExponentialSchedule(10, 3, 0.99)
|
||||
|
||||
current_power = 1
|
||||
for i in range(100):
|
||||
assert round(schedule.current_value,6) == round(10*current_power,6)
|
||||
current_power *= 0.99
|
||||
schedule.step()
|
||||
|
||||
for i in range(100):
|
||||
schedule.step()
|
||||
assert schedule.current_value == 3
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_piece_wise_schedule():
|
||||
# decreasing schedule
|
||||
schedule = PieceWiseSchedule(
|
||||
[(LinearSchedule(1, 3, 10), EnvironmentSteps(5)),
|
||||
(ConstantSchedule(4), EnvironmentSteps(10)),
|
||||
(ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10))
|
||||
]
|
||||
)
|
||||
|
||||
target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4)
|
||||
for i in range(16):
|
||||
assert round(schedule.current_value, 4) == round(target_values[i], 4)
|
||||
schedule.step()
|
||||
|
||||
current_power = 1
|
||||
for i in range(10):
|
||||
assert round(schedule.current_value, 4) == round(3*current_power, 4)
|
||||
current_power *= 0.99
|
||||
schedule.step()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_constant_schedule()
|
||||
test_linear_schedule()
|
||||
test_exponential_schedule()
|
||||
test_piece_wise_schedule()
|
||||
198
rl_coach/tests/test_spaces.py
Normal file
198
rl_coach/tests/test_spaces.py
Normal file
@@ -0,0 +1,198 @@
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace, MultiSelectActionSpace, ObservationSpace, AgentSelection, VectorObservationSpace, AttentionActionSpace
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_discrete():
|
||||
action_space = DiscreteActionSpace(3, ["zero", "one", "two"])
|
||||
assert action_space.shape == 1
|
||||
for i in range(100):
|
||||
assert 3 > action_space.sample() >= 0
|
||||
action_info = action_space.sample_with_info()
|
||||
assert action_info.action_probability == 1. / 3
|
||||
assert action_space.high == 2
|
||||
assert action_space.low == 0
|
||||
|
||||
# list descriptions
|
||||
assert action_space.get_description(1) == "one"
|
||||
|
||||
# dict descriptions
|
||||
action_space = DiscreteActionSpace(3, {1: "one", 2: "two", 0: "zero"})
|
||||
assert action_space.get_description(0) == "zero"
|
||||
|
||||
# no descriptions
|
||||
action_space = DiscreteActionSpace(3)
|
||||
assert action_space.get_description(0) == "0"
|
||||
|
||||
# descriptions for invalid action
|
||||
with pytest.raises(ValueError):
|
||||
assert action_space.get_description(3) == "0"
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_box():
|
||||
# simple action space
|
||||
action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"])
|
||||
for i in range(100):
|
||||
sample = action_space.sample()
|
||||
assert np.all(-5 <= sample) and np.all(sample <= 5)
|
||||
assert sample.shape == (4,)
|
||||
assert sample.dtype == float
|
||||
|
||||
# test clipping
|
||||
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
|
||||
assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
|
||||
|
||||
# more complex high and low definition
|
||||
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), np.array([1, 2, 4, 5]), ["a", "b", "c", "d"])
|
||||
for i in range(100):
|
||||
sample = action_space.sample()
|
||||
assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= np.array([1, 2, 4, 5]))
|
||||
assert sample.shape == (4,)
|
||||
assert sample.dtype == float
|
||||
|
||||
# test clipping
|
||||
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
|
||||
assert np.all(clipped_action == np.array([-5, 2, 2, 5]))
|
||||
|
||||
# mixed high and low definition
|
||||
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5, ["a", "b", "c", "d"])
|
||||
for i in range(100):
|
||||
sample = action_space.sample()
|
||||
assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(sample <= 5)
|
||||
assert sample.shape == (4,)
|
||||
assert sample.dtype == float
|
||||
|
||||
# test clipping
|
||||
clipped_action = action_space.clip_action_to_space(np.array([-10, 10, 2, 5]))
|
||||
assert np.all(clipped_action == np.array([-5, 5, 2, 5]))
|
||||
|
||||
# invalid bounds
|
||||
with pytest.raises(ValueError):
|
||||
action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1, ["a", "b", "c", "d"])
|
||||
|
||||
# TODO: test descriptions
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_multiselect():
|
||||
action_space = MultiSelectActionSpace(4, 2, ["a", "b", "c", "d"])
|
||||
for i in range(100):
|
||||
action = action_space.sample()
|
||||
assert action.shape == (4,)
|
||||
assert np.sum(action) <= 2
|
||||
|
||||
# check that descriptions of multiple actions are working
|
||||
description = action_space.get_description(np.array([1, 0, 1, 0]))
|
||||
assert description == "a + c"
|
||||
|
||||
description = action_space.get_description(np.array([0, 0, 0, 0]))
|
||||
assert description == "no-op"
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_attention():
|
||||
low = np.array([-1, -2, -3, -4])
|
||||
high = np.array([1, 2, 3, 4])
|
||||
action_space = AttentionActionSpace(4, low=low, high=high)
|
||||
for i in range(100):
|
||||
action = action_space.sample()
|
||||
assert len(action) == 2
|
||||
assert action[0].shape == (4,)
|
||||
assert action[1].shape == (4,)
|
||||
assert np.all(action[0] <= action[1])
|
||||
assert np.all(action[0] >= low)
|
||||
assert np.all(action[1] < high)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_goal():
|
||||
# TODO: test goal action space
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_agent_selection():
|
||||
action_space = AgentSelection(10)
|
||||
|
||||
assert action_space.shape == 1
|
||||
assert action_space.high == 9
|
||||
assert action_space.low == 0
|
||||
with pytest.raises(ValueError):
|
||||
assert action_space.get_description(10)
|
||||
assert action_space.get_description(0) == "0"
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_observation_space():
|
||||
observation_space = ObservationSpace(np.array([1, 10]), -10, 10)
|
||||
|
||||
# testing that val_matches_space_definition works
|
||||
assert observation_space.val_matches_space_definition(np.ones([1, 10]))
|
||||
assert not observation_space.val_matches_space_definition(np.ones([2, 10]))
|
||||
assert not observation_space.val_matches_space_definition(np.ones([1, 10]) * 100)
|
||||
assert not observation_space.val_matches_space_definition(np.ones([1, 1, 10]))
|
||||
|
||||
# is_point_in_space_shape
|
||||
assert observation_space.is_point_in_space_shape(np.array([0, 9]))
|
||||
assert observation_space.is_point_in_space_shape(np.array([0, 0]))
|
||||
assert not observation_space.is_point_in_space_shape(np.array([1, 8]))
|
||||
assert not observation_space.is_point_in_space_shape(np.array([0, 10]))
|
||||
assert not observation_space.is_point_in_space_shape(np.array([-1, 6]))
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_image_observation_space():
|
||||
# TODO: test image observation space
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_measurements_observation_space():
|
||||
# empty measurements space
|
||||
measurements_space = VectorObservationSpace(0)
|
||||
|
||||
# vector space
|
||||
measurements_space = VectorObservationSpace(3, measurements_names=['a', 'b', 'c'])
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_reward_space():
|
||||
# TODO: test reward space
|
||||
pass
|
||||
|
||||
|
||||
# def test_discrete_to_linspace_action_space_map():
|
||||
# box = BoxActionSpace(2, np.array([0, 0]), np.array([10, 10]))
|
||||
# linspace = BoxDiscretization(box, [5, 3])
|
||||
# assert np.all(linspace.actions == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]))
|
||||
# assert np.all(linspace.target_actions ==
|
||||
# np.array([[0.0, 0.0], [0.0, 5.0], [0.0, 10.0],
|
||||
# [2.5, 0.0], [2.5, 5.0], [2.5, 10.0],
|
||||
# [5.0, 0.0], [5.0, 5.0], [5.0, 10.0],
|
||||
# [7.5, 0.0], [7.5, 5.0], [7.5, 10.0],
|
||||
# [10.0, 0.0], [10.0, 5.0], [10.0, 10.0]]))
|
||||
#
|
||||
#
|
||||
# def test_discrete_to_attention_action_space_map():
|
||||
# attention = AttentionActionSpace(2, np.array([0, 0]), np.array([10, 10]))
|
||||
# linspace = AttentionDiscretization(attention, 2)
|
||||
# assert np.all(linspace.actions == np.array([0, 1, 2, 3]))
|
||||
# assert np.all(linspace.target_actions ==
|
||||
# np.array(
|
||||
# [[[0., 0.], [5., 5.]],
|
||||
# [[0., 5.], [5., 10.]],
|
||||
# [[5., 0.], [10., 5.]],
|
||||
# [[5., 5.], [10., 10.]]])
|
||||
# )
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_observation_space()
|
||||
test_discrete_to_linspace_action_space_map()
|
||||
test_discrete_to_attention_action_space_map()
|
||||
Reference in New Issue
Block a user