pre-release 0.10.0

2026-07-09 02:46:33 +02:00 · 2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions
@@ -0,0 +1,91 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+import time
+from rl_coach.memories.non_episodic.differentiable_neural_dictionary import QDND
+import tensorflow as tf
+
+NUM_ACTIONS = 3
+NUM_DND_ENTRIES_TO_ADD = 10000
+EMBEDDING_SIZE = 512
+NUM_SAMPLED_EMBEDDINGS = 500
+NUM_NEIGHBORS = 10
+DND_SIZE = 500000
+
+@pytest.fixture()
+def dnd():
+    return QDND(
+                DND_SIZE,
+                EMBEDDING_SIZE,
+                NUM_ACTIONS,
+                0.1,
+                key_error_threshold=0,
+                learning_rate=0.0001,
+                num_neighbors=NUM_NEIGHBORS
+                )
+
+
+@pytest.mark.unit_test
+def test_random_sample_from_dnd(dnd: QDND):
+    # store single non terminal transition
+    embeddings = [np.random.rand(EMBEDDING_SIZE) for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    actions = [np.random.randint(NUM_ACTIONS) for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    values = [np.random.rand() for j in range(NUM_DND_ENTRIES_TO_ADD)]
+    dnd.add(embeddings, actions, values)
+    dnd_embeddings, dnd_values, dnd_indices = dnd.query(embeddings[0:10], 0, NUM_NEIGHBORS)
+
+    # calculate_normalization_factor
+    sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
+    coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
+    tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
+    tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
+
+    sub = tf_current_embedding - tf_other_embeddings
+    square = tf.square(sub)
+    result = tf.reduce_sum(square)
+
+
+
+    ###########################
+    # more efficient method
+    ###########################
+    sampled_embeddings_expanded = tf.placeholder(
+        tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
+    sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
+    sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
+    sub2 = sampled_embeddings_tiled - sampled_embeddings_transposed
+    square2 = tf.square(sub2)
+    result2 = tf.reduce_sum(square2)
+
+    config = tf.ConfigProto()
+    config.allow_soft_placement = True  # allow placing ops on cpu if they are not fit for gpu
+    config.gpu_options.allow_growth = True  # allow the gpu memory allocated for the worker to grow if needed
+
+    sess = tf.Session(config=config)
+
+    sum1 = 0
+    start = time.time()
+    for i in range(NUM_SAMPLED_EMBEDDINGS):
+        curr_sampled_embedding = sampled_embeddings[i]
+        other_embeddings = np.delete(sampled_embeddings, i, 0)
+        sum1 += sess.run(result, feed_dict={tf_current_embedding: curr_sampled_embedding, tf_other_embeddings: other_embeddings})
+    print("1st method: {} sec".format(time.time()-start))
+
+    start = time.time()
+    sum2 = sess.run(result2, feed_dict={sampled_embeddings_expanded: np.expand_dims(sampled_embeddings,0)})
+    print("2nd method: {} sec".format(time.time()-start))
+
+    # validate that results are equal
+    print("sum1 = {}, sum2 = {}".format(sum1, sum2))
+
+    norm_factor = -0.5/(coefficient * sum2)
+
+if __name__ == '__main__':
+    test_random_sample_from_dnd(dnd())
+
@@ -0,0 +1,97 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters
+from rl_coach.spaces import GoalsSpace, ReachingGoal
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+# print(sys.path)
+
+import pytest
+import numpy as np
+
+from rl_coach.core_types import Transition, Episode
+from rl_coach.memories.memory import MemoryGranularity
+from rl_coach.memories.episodic.episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplay, \
+     HindsightGoalSelectionMethod
+
+
+#TODO: change from defining a new class to creating an instance from the parameters
+class Parameters(EpisodicHindsightExperienceReplayParameters):
+    def __init__(self):
+        super().__init__()
+        self.max_size = (MemoryGranularity.Transitions, 100)
+        self.hindsight_transitions_per_regular_transition = 4
+        self.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
+        self.goals_space = GoalsSpace(goal_name='observation',
+                                      reward_type=ReachingGoal(distance_from_goal_threshold=0.1),
+                                      distance_metric=GoalsSpace.DistanceMetric.Euclidean)
+
+
+@pytest.fixture
+def episode():
+    episode = []
+    for i in range(10):
+        episode.append(Transition(
+            state={'observation': np.array([i]), 'desired_goal': np.array([i]), 'achieved_goal': np.array([i])},
+            action=i,
+        ))
+    return episode
+
+
+@pytest.fixture
+def her():
+    params = Parameters().__dict__
+
+    import inspect
+    args = set(inspect.getfullargspec(EpisodicHindsightExperienceReplay.__init__).args).intersection(params)
+    params = {k: params[k] for k in args}
+
+    return EpisodicHindsightExperienceReplay(**params)
+
+
+@pytest.mark.unit_test
+def test_sample_goal(her, episode):
+    assert her._sample_goal(episode, 8) == 9
+
+
+@pytest.mark.unit_test
+def test_sample_goal_range(her, episode):
+    unseen_goals = set(range(1, 9))
+    for _ in range(500):
+        unseen_goals -= set([int(her._sample_goal(episode, 0))])
+        if not unseen_goals:
+            return
+
+    assert unseen_goals == set()
+
+
+@pytest.mark.unit_test
+def test_update_episode(her):
+    episode = Episode()
+    for i in range(10):
+        episode.insert(Transition(
+            state={'observation': np.array([i]), 'desired_goal': np.array([i+1]), 'achieved_goal': np.array([i+1])},
+            action=i,
+            game_over=i == 9,
+            reward=0 if i == 9 else -1,
+        ))
+
+    her.store_episode(episode)
+    # print('her._num_transitions', her._num_transitions)
+
+    # 10 original transitions, and 9 transitions * 4 hindsight episodes
+    assert her.num_transitions() == 10 + (4 * 9)
+
+    # make sure that the goal state was never sampled from the past
+    for transition in her.transitions:
+        assert transition.state['desired_goal'] > transition.state['observation']
+        assert transition.next_state['desired_goal'] >= transition.next_state['observation']
+
+        if transition.reward == 0:
+            assert transition.game_over
+        else:
+            assert not transition.game_over
+
+test_update_episode(her())
@@ -0,0 +1,93 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+
+from rl_coach.memories.non_episodic.prioritized_experience_replay import SegmentTree
+
+
+@pytest.mark.unit_test
+def test_sum_tree():
+    # test power of 2 sum tree
+    sum_tree = SegmentTree(size=4, operation=SegmentTree.Operation.SUM)
+    sum_tree.add(10, "10")
+    assert sum_tree.total_value() == 10
+    sum_tree.add(20, "20")
+    assert sum_tree.total_value() == 30
+    sum_tree.add(5, "5")
+    assert sum_tree.total_value() == 35
+    sum_tree.add(7.5, "7.5")
+    assert sum_tree.total_value() == 42.5
+    sum_tree.add(2.5, "2.5")
+    assert sum_tree.total_value() == 35
+    sum_tree.add(5, "5")
+    assert sum_tree.total_value() == 20
+
+    assert sum_tree.get(2) == (0, 2.5, '2.5')
+    assert sum_tree.get(3) == (1, 5.0, '5')
+    assert sum_tree.get(10) == (2, 5.0, '5')
+    assert sum_tree.get(13) == (3, 7.5, '7.5')
+
+    sum_tree.update(2, 10)
+    assert sum_tree.__str__() == "[25.]\n[ 7.5 17.5]\n[ 2.5  5.  10.   7.5]\n"
+
+    # test non power of 2 sum tree
+    with pytest.raises(ValueError):
+        sum_tree = SegmentTree(size=5, operation=SegmentTree.Operation.SUM)
+
+
+@pytest.mark.unit_test
+def test_min_tree():
+    min_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MIN)
+    min_tree.add(10, "10")
+    assert min_tree.total_value() == 10
+    min_tree.add(20, "20")
+    assert min_tree.total_value() == 10
+    min_tree.add(5, "5")
+    assert min_tree.total_value() == 5
+    min_tree.add(7.5, "7.5")
+    assert min_tree.total_value() == 5
+    min_tree.add(2, "2")
+    assert min_tree.total_value() == 2
+    min_tree.add(3, "3")
+    min_tree.add(3, "3")
+    min_tree.add(3, "3")
+    min_tree.add(5, "5")
+    assert min_tree.total_value() == 3
+
+
+@pytest.mark.unit_test
+def test_max_tree():
+    max_tree = SegmentTree(size=4, operation=SegmentTree.Operation.MAX)
+    max_tree.add(10, "10")
+    assert max_tree.total_value() == 10
+    max_tree.add(20, "20")
+    assert max_tree.total_value() == 20
+    max_tree.add(5, "5")
+    assert max_tree.total_value() == 20
+    max_tree.add(7.5, "7.5")
+    assert max_tree.total_value() == 20
+    max_tree.add(2, "2")
+    assert max_tree.total_value() == 20
+    max_tree.add(3, "3")
+    max_tree.add(3, "3")
+    max_tree.add(3, "3")
+    max_tree.add(5, "5")
+    assert max_tree.total_value() == 5
+
+    # update
+    max_tree.update(1, 10)
+    assert max_tree.total_value() == 10
+    assert max_tree.__str__() == "[10.]\n[10.  3.]\n[ 5. 10.  3.  3.]\n"
+    max_tree.update(1, 2)
+    assert max_tree.total_value() == 5
+    assert max_tree.__str__() == "[5.]\n[5. 3.]\n[5. 2. 3. 3.]\n"
+
+
+if __name__ == "__main__":
+    test_sum_tree()
+    test_min_tree()
+    test_max_tree()
@@ -0,0 +1,81 @@
+# nasty hack to deal with issue #46
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+import pytest
+import numpy as np
+
+from rl_coach.core_types import Transition
+from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBuffer
+
+
+@pytest.fixture()
+def buffer():
+    return SingleEpisodeBuffer()
+
+
+@pytest.mark.unit_test
+def test_store_and_get(buffer: SingleEpisodeBuffer):
+    # store single non terminal transition
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 0
+    assert buffer.num_transitions_in_complete_episodes() == 0
+    assert buffer.num_transitions() == 1
+
+    # get the single stored transition
+    episode = buffer.get(0)
+    assert episode.length() == 1
+    assert episode.get_first_transition() is transition    # check addresses are the same
+    assert episode.get_last_transition() is transition   # check addresses are the same
+
+    # store single terminal transition
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 1
+    assert buffer.num_transitions_in_complete_episodes() == 2
+
+    # check that the episode is valid
+    episode = buffer.get(0)
+    assert episode.length() == 2
+    assert episode.get_transition(0).total_return == 1 + 0.99
+    assert episode.get_transition(1).total_return == 1
+    assert buffer.mean_reward() == 1
+
+    # only one episode in the replay buffer
+    episode = buffer.get(1)
+    assert episode is None
+
+    # adding transitions after the first episode was closed
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=0, game_over=False)
+    buffer.store(transition)
+    assert buffer.length() == 1
+    assert buffer.num_complete_episodes() == 0
+    assert buffer.num_transitions_in_complete_episodes() == 0
+
+    # still only one episode
+    assert buffer.get(1) is None
+    assert buffer.mean_reward() == 0
+
+
+@pytest.mark.unit_test
+def test_clean(buffer: SingleEpisodeBuffer):
+    # add several transitions and then clean the buffer
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=False)
+    for i in range(10):
+        buffer.store(transition)
+    assert buffer.num_transitions() == 10
+    buffer.clean()
+    assert buffer.num_transitions() == 0
+
+    # add more transitions after the clean and make sure they were really cleaned
+    transition = Transition(state={"observation": np.array([1, 2, 3])}, action=1, reward=1, game_over=True)
+    buffer.store(transition)
+    assert buffer.num_transitions() == 1
+    assert buffer.num_transitions_in_complete_episodes() == 1
+    assert buffer.num_complete_episodes() == 1
+    for i in range(10):
+        assert buffer.sample(1)[0] is transition