import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) import pytest from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace from rl_coach.exploration_policies.additive_noise import AdditiveNoise from rl_coach.schedules import LinearSchedule import numpy as np @pytest.mark.unit_test def test_init(): # discrete control action_space = DiscreteActionSpace(3) noise_schedule = LinearSchedule(1.0, 1.0, 1000) # additive noise requires a bounded range for the actions action_space = BoxActionSpace(np.array([10])) with pytest.raises(ValueError): policy = AdditiveNoise(action_space, noise_schedule, 0) @pytest.mark.unit_test def test_get_action(): # make sure noise is in range action_space = BoxActionSpace(np.array([10]), -1, 1) noise_schedule = LinearSchedule(1.0, 1.0, 1000) policy = AdditiveNoise(action_space, noise_schedule, 0) # the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6 for i in range(1000): action = policy.get_action(np.zeros([10])) assert np.all(action < 10) # make sure there is no clipping of the action since it should be the environment that clips actions assert np.all(action != 1.0) assert np.all(action != -1.0) # make sure that each action element has a different value assert np.all(action[0] != action[1:])