Release 0.9

Main changes are detailed below: New features - * CARLA 0.7 simulator integration * Human control of the game play * Recording of human game play and storing / loading the replay buffer * Behavioral cloning agent and presets * Golden tests for several presets * Selecting between deep / shallow image embedders * Rendering through pygame (with some boost in performance) API changes - * Improved environment wrapper API * Added an evaluate flag to allow convenient evaluation of existing checkpoints * Improve frameskip definition in Gym Bug fixes - * Fixed loading of checkpoints for agents with more than one network * Fixed the N Step Q learning agent python3 compatibility
2026-02-27 12:45:52 +01:00 · 2017-12-19 19:27:16 +02:00
parent 11faf19649
commit 125c7ee38d
41 changed files with 1713 additions and 260 deletions
--- a/memories/memory.py
+++ b/memories/memory.py
@@ -73,6 +73,7 @@ class Episode(object):
        if n_step_return == -1 or n_step_return > self.length():
            n_step_return = self.length()
        rewards = np.array([t.reward for t in self.transitions])
+        rewards = rewards.astype('float')
        total_return = rewards.copy()
        current_discount = discount
        for i in range(1, n_step_return):
@@ -123,12 +124,30 @@ class Episode(object):


 class Transition(object):
-    def __init__(self, state, action, reward, next_state, game_over):
+    def __init__(self, state, action, reward=0, next_state=None, game_over=False):
+        """
+        A transition is a tuple containing the information of a single step of interaction
+        between the agent and the environment. The most basic version should contain the following values:
+        (current state, action, reward, next state, game over)
+        For imitation learning algorithms, if the reward, next state or game over is not known,
+        it is sufficient to store the current state and action taken by the expert.
+
+        :param state: The current state. Assumed to be a dictionary where the observation
+                      is located at state['observation']
+        :param action: The current action that was taken
+        :param reward: The reward received from the environment
+        :param next_state: The next state of the environment after applying the action.
+                           The next state should be similar to the state in its structure.
+        :param game_over: A boolean which should be True if the episode terminated after
+                          the execution of the action.
+        """
        self.state = copy.deepcopy(state)
        self.state['observation'] = np.array(self.state['observation'], copy=False)
        self.action = action
        self.reward = reward
        self.total_return = None
+        if not next_state:
+            next_state = state
        self.next_state = copy.deepcopy(next_state)
        self.next_state['observation'] = np.array(self.next_state['observation'], copy=False)
        self.game_over = game_over