Release 0.9

Main changes are detailed below: New features - * CARLA 0.7 simulator integration * Human control of the game play * Recording of human game play and storing / loading the replay buffer * Behavioral cloning agent and presets * Golden tests for several presets * Selecting between deep / shallow image embedders * Rendering through pygame (with some boost in performance) API changes - * Improved environment wrapper API * Added an evaluate flag to allow convenient evaluation of existing checkpoints * Improve frameskip definition in Gym Bug fixes - * Fixed loading of checkpoints for agents with more than one network * Fixed the N Step Q learning agent python3 compatibility
2026-02-16 05:55:46 +01:00 · 2017-12-19 19:27:16 +02:00
parent 11faf19649
commit 125c7ee38d
41 changed files with 1713 additions and 260 deletions
--- a/environments/doom_environment_wrapper.py
+++ b/environments/doom_environment_wrapper.py
@@ -25,6 +25,7 @@ import numpy as np
 from environments.environment_wrapper import EnvironmentWrapper
 from os import path, environ
 from utils import *
+from logger import *


 # enum of the available levels and their path
@@ -39,6 +40,43 @@ class DoomLevel(Enum):
    DEFEND_THE_LINE = "defend_the_line.cfg"
    DEADLY_CORRIDOR = "deadly_corridor.cfg"

+key_map = {
+    'NO-OP': 96,  # `
+    'ATTACK': 13,  # enter
+    'CROUCH': 306,  # ctrl
+    'DROP_SELECTED_ITEM': ord("t"),
+    'DROP_SELECTED_WEAPON': ord("t"),
+    'JUMP': 32,  # spacebar
+    'LAND': ord("l"),
+    'LOOK_DOWN': 274,  # down arrow
+    'LOOK_UP': 273,  # up arrow
+    'MOVE_BACKWARD': ord("s"),
+    'MOVE_DOWN': ord("s"),
+    'MOVE_FORWARD': ord("w"),
+    'MOVE_LEFT': 276,
+    'MOVE_RIGHT': 275,
+    'MOVE_UP': ord("w"),
+    'RELOAD': ord("r"),
+    'SELECT_NEXT_WEAPON': ord("q"),
+    'SELECT_PREV_WEAPON': ord("e"),
+    'SELECT_WEAPON0': ord("0"),
+    'SELECT_WEAPON1': ord("1"),
+    'SELECT_WEAPON2': ord("2"),
+    'SELECT_WEAPON3': ord("3"),
+    'SELECT_WEAPON4': ord("4"),
+    'SELECT_WEAPON5': ord("5"),
+    'SELECT_WEAPON6': ord("6"),
+    'SELECT_WEAPON7': ord("7"),
+    'SELECT_WEAPON8': ord("8"),
+    'SELECT_WEAPON9': ord("9"),
+    'SPEED': 304,  # shift
+    'STRAFE': 9,  # tab
+    'TURN180': ord("u"),
+    'TURN_LEFT': ord("a"),  # left arrow
+    'TURN_RIGHT': ord("d"),  # right arrow
+    'USE': ord("f"),
+}
+

 class DoomEnvironmentWrapper(EnvironmentWrapper):
    def __init__(self, tuning_parameters):
@@ -49,26 +87,42 @@ class DoomEnvironmentWrapper(EnvironmentWrapper):
        self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')
        self.game = vizdoom.DoomGame()
        self.game.load_config(path.join(self.scenarios_dir, self.level))
-        self.game.set_window_visible(self.is_rendered)
+        self.game.set_window_visible(False)
        self.game.add_game_args("+vid_forcesurface 1")
-        if self.is_rendered:
+
+        self.wait_for_explicit_human_action = True
+        if self.human_control:
+            self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_640X480)
+            self.renderer.create_screen(640, 480)
+        elif self.is_rendered:
            self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240)
+            self.renderer.create_screen(320, 240)
        else:
            # lower resolution since we actually take only 76x60 and we don't need to render
            self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_160X120)
+
        self.game.set_render_hud(False)
        self.game.set_render_crosshair(False)
        self.game.set_render_decals(False)
        self.game.set_render_particles(False)
        self.game.init()

+        # action space
        self.action_space_abs_range = 0
        self.actions = {}
-        self.action_space_size = self.game.get_available_buttons_size()
-        for action_idx in range(self.action_space_size):
-            self.actions[action_idx] = [0] * self.action_space_size
-            self.actions[action_idx][action_idx] = 1
-        self.actions_description = [str(action) for action in self.game.get_available_buttons()]
+        self.action_space_size = self.game.get_available_buttons_size() + 1
+        self.action_vector_size = self.action_space_size - 1
+        self.actions[0] = [0] * self.action_vector_size
+        for action_idx in range(self.action_vector_size):
+            self.actions[action_idx + 1] = [0] * self.action_vector_size
+            self.actions[action_idx + 1][action_idx] = 1
+        self.actions_description = ['NO-OP']
+        self.actions_description += [str(action).split(".")[1] for action in self.game.get_available_buttons()]
+        for idx, action in enumerate(self.actions_description):
+            if action in key_map.keys():
+                self.key_to_action[(key_map[action],)] = idx
+
+        # measurement
        self.measurements_size = self.game.get_state().game_variables.shape

        self.width = self.game.get_screen_width()
@@ -77,27 +131,17 @@ class DoomEnvironmentWrapper(EnvironmentWrapper):
            self.game.set_seed(self.tp.seed)
        self.reset()

-    def _update_observation_and_measurements(self):
+    def _update_state(self):
        # extract all data from the current state
        state = self.game.get_state()
        if state is not None and state.screen_buffer is not None:
-            self.observation = self._preprocess_observation(state.screen_buffer)
+            self.observation = state.screen_buffer
            self.measurements = state.game_variables
+        self.reward = self.game.get_last_reward()
        self.done = self.game.is_episode_finished()

-    def step(self, action_idx):
-        self.reward = 0
-        for frame in range(self.tp.env.frame_skip):
-            self.reward += self.game.make_action(self._idx_to_action(action_idx))
-            self._update_observation_and_measurements()
-            if self.done:
-                break
-
-        return {'observation': self.observation,
-                'reward': self.reward,
-                'done': self.done,
-                'action': action_idx,
-                'measurements': self.measurements}
+    def _take_action(self, action_idx):
+        self.game.make_action(self._idx_to_action(action_idx), self.frame_skip)

    def _preprocess_observation(self, observation):
        if observation is None:
@@ -108,3 +152,5 @@ class DoomEnvironmentWrapper(EnvironmentWrapper):

    def _restart_environment_episode(self, force_environment_reset=False):
        self.game.new_episode()
+
+