mirror of
https://github.com/gryf/coach.git
synced 2026-03-12 12:35:49 +01:00
pre-release 0.10.0
This commit is contained in:
0
rl_coach/tests/memories/__init__.py
Normal file
0
rl_coach/tests/memories/__init__.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
import time
|
||||
from rl_coach.memories.non_episodic.differentiable_neural_dictionary import QDND
|
||||
import tensorflow as tf
|
||||
|
||||
NUM_ACTIONS = 3
|
||||
NUM_DND_ENTRIES_TO_ADD = 10000
|
||||
EMBEDDING_SIZE = 512
|
||||
NUM_SAMPLED_EMBEDDINGS = 500
|
||||
NUM_NEIGHBORS = 10
|
||||
DND_SIZE = 500000
|
||||
|
||||
@pytest.fixture()
|
||||
def dnd():
|
||||
return QDND(
|
||||
DND_SIZE,
|
||||
EMBEDDING_SIZE,
|
||||
NUM_ACTIONS,
|
||||
0.1,
|
||||
key_error_threshold=0,
|
||||
learning_rate=0.0001,
|
||||
num_neighbors=NUM_NEIGHBORS
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_random_sample_from_dnd(dnd: QDND):
|
||||
# store single non terminal transition
|
||||
embeddings = [np.random.rand(EMBEDDING_SIZE) for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
actions = [np.random.randint(NUM_ACTIONS) for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
values = [np.random.rand() for j in range(NUM_DND_ENTRIES_TO_ADD)]
|
||||
dnd.add(embeddings, actions, values)
|
||||
dnd_embeddings, dnd_values, dnd_indices = dnd.query(embeddings[0:10], 0, NUM_NEIGHBORS)
|
||||
|
||||
# calculate_normalization_factor
|
||||
sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
|
||||
coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
|
||||
tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
|
||||
tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
|
||||
|
||||
sub = tf_current_embedding - tf_other_embeddings
|
||||
square = tf.square(sub)
|
||||
result = tf.reduce_sum(square)
|
||||
|
||||
|
||||
|
||||
###########################
|
||||
# more efficient method
|
||||
###########################
|
||||
sampled_embeddings_expanded = tf.placeholder(
|
||||
tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
|
||||
sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
|
||||
sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
|
||||
sub2 = sampled_embeddings_tiled - sampled_embeddings_transposed
|
||||
square2 = tf.square(sub2)
|
||||
result2 = tf.reduce_sum(square2)
|
||||
|
||||
config = tf.ConfigProto()
|
||||
config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu
|
||||
config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed
|
||||
|
||||
sess = tf.Session(config=config)
|
||||
|
||||
sum1 = 0
|
||||
start = time.time()
|
||||
for i in range(NUM_SAMPLED_EMBEDDINGS):
|
||||
curr_sampled_embedding = sampled_embeddings[i]
|
||||
other_embeddings = np.delete(sampled_embeddings, i, 0)
|
||||
sum1 += sess.run(result, feed_dict={tf_current_embedding: curr_sampled_embedding, tf_other_embeddings: other_embeddings})
|
||||
print("1st method: {} sec".format(time.time()-start))
|
||||
|
||||
start = time.time()
|
||||
sum2 = sess.run(result2, feed_dict={sampled_embeddings_expanded: np.expand_dims(sampled_embeddings,0)})
|
||||
print("2nd method: {} sec".format(time.time()-start))
|
||||
|
||||
# validate that results are equal
|
||||
print("sum1 = {}, sum2 = {}".format(sum1, sum2))
|
||||
|
||||
norm_factor = -0.5/(coefficient * sum2)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_random_sample_from_dnd(dnd())
|
||||
|
||||
97
rl_coach/tests/memories/test_hindsight_experience_replay.py
Normal file
97
rl_coach/tests/memories/test_hindsight_experience_replay.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters
|
||||
from rl_coach.spaces import GoalsSpace, ReachingGoal
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
# print(sys.path)
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import Transition, Episode
|
||||
from rl_coach.memories.memory import MemoryGranularity
|
||||
from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplay, \
|
||||
HindsightGoalSelectionMethod
|
||||
|
||||
|
||||
#TODO: change from defining a new class to creating an instance from the parameters
|
||||
class Parameters(EpisodicHindsightExperienceReplayParameters):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.max_size = (MemoryGranularity.Transitions, 100)
|
||||
self.hindsight_transitions_per_regular_transition = 4
|
||||
self.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
|
||||
self.goals_space = GoalsSpace(goal_name='observation',
|
||||
reward_type=ReachingGoal(distance_from_goal_threshold=0.1),
|
||||
distance_metric=GoalsSpace.DistanceMetric.Euclidean)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def episode():
|
||||
episode = []
|
||||
for i in range(10):
|
||||
episode.append(Transition(
|
||||
state={'observation': np.array([i]), 'desired_goal': np.array([i]), 'achieved_goal': np.array([i])},
|
||||
action=i,
|
||||
))
|
||||
return episode
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def her():
|
||||
params = Parameters().__dict__
|
||||
|
||||
import inspect
|
||||
args = set(inspect.getfullargspec(EpisodicHindsightExperienceReplay.__init__).args).intersection(params)
|
||||
params = {k: params[k] for k in args}
|
||||
|
||||
return EpisodicHindsightExperienceReplay(**params)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sample_goal(her, episode):
|
||||
assert her._sample_goal(episode, 8) == 9
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sample_goal_range(her, episode):
|
||||
unseen_goals = set(range(1, 9))
|
||||
for _ in range(500):
|
||||
unseen_goals -= set([int(her._sample_goal(episode, 0))])
|
||||
if not unseen_goals:
|
||||
return
|
||||
|
||||
assert unseen_goals == set()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_update_episode(her):
|
||||
episode = Episode()
|
||||
for i in range(10):
|
||||
episode.insert(Transition(
|
||||
state={'observation': np.array([i]), 'desired_goal': np.array([i+1]), 'achieved_goal': np.array([i+1])},
|
||||
action=i,
|
||||
game_over=i == 9,
|
||||
reward=0 if i == 9 else -1,
|
||||
))
|
||||
|
||||
her.store_episode(episode)
|
||||
# print('her._num_transitions', her._num_transitions)
|
||||
|
||||
# 10 original transitions, and 9 transitions * 4 hindsight episodes
|
||||
assert her.num_transitions() == 10 + (4 * 9)
|
||||
|
||||
# make sure that the goal state was never sampled from the past
|
||||
for transition in her.transitions:
|
||||
assert transition.state['desired_goal'] > transition.state['observation']
|
||||
assert transition.next_state['desired_goal'] >= transition.next_state['observation']
|
||||
|
||||
if transition.reward == 0:
|
||||
assert transition.game_over
|
||||
else:
|
||||
assert not transition.game_over
|
||||
|
||||
test_update_episode(her())
|
||||
@@ -0,0 +1,93 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
|
||||
from rl_coach.memories.non_episodic.prioritized_experience_replay import SegmentTree
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_sum_tree():
|
||||
# test power of 2 sum tree
|
||||
sum_tree = SegmentTree(size=4, operation=SegmentTree.Operation.SUM)
|
||||
sum_tree.add(10, "10")
|
||||
assert sum_tree.total_value() == 10
|
||||
sum_tree.add(20, "20")
|
||||
assert sum_tree.total_value() == 30
|
||||
sum_tree.add(5, "5")
|
||||
assert sum_tree.total_value() == 35
|
||||
sum_tree.add(7.5, "7.5")
|
||||
assert sum_tree.total_value() == 42.5
|
||||
sum_tree.add(2.5, "2.5")
|
||||
assert sum_tree.total_value() == 35
|
||||
sum_tree.add(5, "5")
|
||||
assert sum_tree.total_value() == 20
|
||||
|
||||
assert sum_tree.get(2) == (0, 2.5, '2.5')
|
||||
assert sum_tree.get(3) == (1, 5.0, '5')
|
||||
assert sum_tree.get(10) == (2, 5.0, '5')
|
||||
assert sum_tree.get(13) == (3, 7.5, '7.5')
|
||||
|
||||
sum_tree.update(2, 10)
|
||||
assert sum_tree.__str__() == "[25.]\n[ 7.5 17.5]\n[ 2.5 5. 10. 7.5]\n"
|
||||
|
||||
# test non power of 2 sum tree
|
||||
with pytest.raises(ValueError):
|
||||
sum_tree = SegmentTree(size=5, operation=SegmentTree.Operation.SUM)
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_min_tree():
|
||||
min_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MIN)
|
||||
min_tree.add(10, "10")
|
||||
assert min_tree.total_value() == 10
|
||||
min_tree.add(20, "20")
|
||||
assert min_tree.total_value() == 10
|
||||
min_tree.add(5, "5")
|
||||
assert min_tree.total_value() == 5
|
||||
min_tree.add(7.5, "7.5")
|
||||
assert min_tree.total_value() == 5
|
||||
min_tree.add(2, "2")
|
||||
assert min_tree.total_value() == 2
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(3, "3")
|
||||
min_tree.add(5, "5")
|
||||
assert min_tree.total_value() == 3
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_max_tree():
|
||||
max_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MAX)
|
||||
max_tree.add(10, "10")
|
||||
assert max_tree.total_value() == 10
|
||||
max_tree.add(20, "20")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(5, "5")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(7.5, "7.5")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(2, "2")
|
||||
assert max_tree.total_value() == 20
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(3, "3")
|
||||
max_tree.add(5, "5")
|
||||
assert max_tree.total_value() == 5
|
||||
|
||||
# update
|
||||
max_tree.update(1, 10)
|
||||
assert max_tree.total_value() == 10
|
||||
assert max_tree.__str__() == "[10.]\n[10. 3.]\n[ 5. 10. 3. 3.]\n"
|
||||
max_tree.update(1, 2)
|
||||
assert max_tree.total_value() == 5
|
||||
assert max_tree.__str__() == "[5.]\n[5. 3.]\n[5. 2. 3. 3.]\n"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sum_tree()
|
||||
test_min_tree()
|
||||
test_max_tree()
|
||||
81
rl_coach/tests/memories/test_single_episode_buffer.py
Normal file
81
rl_coach/tests/memories/test_single_episode_buffer.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# nasty hack to deal with issue #46
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from rl_coach.core_types import Transition
|
||||
from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBuffer
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def buffer():
|
||||
return SingleEpisodeBuffer()
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_store_and_get(buffer: SingleEpisodeBuffer):
|
||||
# store single non terminal transition
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 0
|
||||
assert buffer.num_transitions_in_complete_episodes() == 0
|
||||
assert buffer.num_transitions() == 1
|
||||
|
||||
# get the single stored transition
|
||||
episode = buffer.get(0)
|
||||
assert episode.length() == 1
|
||||
assert episode.get_first_transition() is transition # check addresses are the same
|
||||
assert episode.get_last_transition() is transition # check addresses are the same
|
||||
|
||||
# store single terminal transition
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 1
|
||||
assert buffer.num_transitions_in_complete_episodes() == 2
|
||||
|
||||
# check that the episode is valid
|
||||
episode = buffer.get(0)
|
||||
assert episode.length() == 2
|
||||
assert episode.get_transition(0).total_return == 1 + 0.99
|
||||
assert episode.get_transition(1).total_return == 1
|
||||
assert buffer.mean_reward() == 1
|
||||
|
||||
# only one episode in the replay buffer
|
||||
episode = buffer.get(1)
|
||||
assert episode is None
|
||||
|
||||
# adding transitions after the first episode was closed
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=0, game_over=False)
|
||||
buffer.store(transition)
|
||||
assert buffer.length() == 1
|
||||
assert buffer.num_complete_episodes() == 0
|
||||
assert buffer.num_transitions_in_complete_episodes() == 0
|
||||
|
||||
# still only one episode
|
||||
assert buffer.get(1) is None
|
||||
assert buffer.mean_reward() == 0
|
||||
|
||||
|
||||
@pytest.mark.unit_test
|
||||
def test_clean(buffer: SingleEpisodeBuffer):
|
||||
# add several transitions and then clean the buffer
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
|
||||
for i in range(10):
|
||||
buffer.store(transition)
|
||||
assert buffer.num_transitions() == 10
|
||||
buffer.clean()
|
||||
assert buffer.num_transitions() == 0
|
||||
|
||||
# add more transitions after the clean and make sure they were really cleaned
|
||||
transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
|
||||
buffer.store(transition)
|
||||
assert buffer.num_transitions() == 1
|
||||
assert buffer.num_transitions_in_complete_episodes() == 1
|
||||
assert buffer.num_complete_episodes() == 1
|
||||
for i in range(10):
|
||||
assert buffer.sample(1)[0] is transition
|
||||
Reference in New Issue
Block a user