coach v0.8.0

2026-03-19 00:13:46 +01:00 · 2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions
--- a/environments/init.py
+++ b/environments/init.py
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from logger import *
+from utils import Enum
+from environments.gym_environment_wrapper import *
+from environments.doom_environment_wrapper import *
+
+class EnvTypes(Enum):
+    Doom = "DoomEnvironmentWrapper"
+    Gym = "GymEnvironmentWrapper"
+
+
+def create_environment(tuning_parameters):
+    env_type_name, env_type = EnvTypes().verify(tuning_parameters.env.type)
+    env = eval(env_type)(tuning_parameters)
+    return env
+
+
+
--- a/environments/doom_environment_wrapper.py
+++ b/environments/doom_environment_wrapper.py
@@ -0,0 +1,110 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+try:
+    import vizdoom
+except ImportError:
+    from logger import failed_imports
+    failed_imports.append("ViZDoom")
+
+import numpy as np
+from environments.environment_wrapper import EnvironmentWrapper
+from os import path, environ
+from utils import *
+
+
+# enum of the available levels and their path
+class DoomLevel(Enum):
+    BASIC = "basic.cfg"
+    DEFEND = "defend_the_center.cfg"
+    DEATHMATCH = "deathmatch.cfg"
+    MY_WAY_HOME = "my_way_home.cfg"
+    TAKE_COVER = "take_cover.cfg"
+    HEALTH_GATHERING = "health_gathering.cfg"
+    HEALTH_GATHERING_SUPREME = "health_gathering_supreme.cfg"
+    DEFEND_THE_LINE = "defend_the_line.cfg"
+    DEADLY_CORRIDOR = "deadly_corridor.cfg"
+
+
+class DoomEnvironmentWrapper(EnvironmentWrapper):
+    def __init__(self, tuning_parameters):
+        EnvironmentWrapper.__init__(self, tuning_parameters)
+
+        # load the emulator with the required level
+        self.level = DoomLevel().get(self.tp.env.level)
+        self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')
+        self.game = vizdoom.DoomGame()
+        self.game.load_config(path.join(self.scenarios_dir, self.level))
+        self.game.set_window_visible(self.is_rendered)
+        self.game.add_game_args("+vid_forcesurface 1")
+        if self.is_rendered:
+            self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240)
+        else:
+            # lower resolution since we actually take only 76x60 and we don't need to render
+            self.game.set_screen_resolution(vizdoom.ScreenResolution.RES_160X120)
+        self.game.set_render_hud(False)
+        self.game.set_render_crosshair(False)
+        self.game.set_render_decals(False)
+        self.game.set_render_particles(False)
+        self.game.init()
+
+        self.action_space_abs_range = 0
+        self.actions = {}
+        self.action_space_size = self.game.get_available_buttons_size()
+        for action_idx in range(self.action_space_size):
+            self.actions[action_idx] = [0] * self.action_space_size
+            self.actions[action_idx][action_idx] = 1
+        self.actions_description = [str(action) for action in self.game.get_available_buttons()]
+        self.measurements_size = self.game.get_state().game_variables.shape
+
+        self.width = self.game.get_screen_width()
+        self.height = self.game.get_screen_height()
+        if self.tp.seed is not None:
+            self.game.set_seed(self.tp.seed)
+        self.reset()
+
+    def _update_observation_and_measurements(self):
+        # extract all data from the current state
+        state = self.game.get_state()
+        if state is not None and state.screen_buffer is not None:
+            self.observation = self._preprocess_observation(state.screen_buffer)
+            self.measurements = state.game_variables
+        self.done = self.game.is_episode_finished()
+
+    def step(self, action_idx):
+        self.reward = 0
+        for frame in range(self.tp.env.frame_skip):
+            self.reward += self.game.make_action(self._idx_to_action(action_idx))
+            self._update_observation_and_measurements()
+            if self.done:
+                break
+
+        return {'observation': self.observation,
+                'reward': self.reward,
+                'done': self.done,
+                'action': action_idx,
+                'measurements': self.measurements}
+
+    def _preprocess_observation(self, observation):
+        if observation is None:
+            return None
+        # move the channel to the last axis
+        observation = np.transpose(observation, (1, 2, 0))
+        return observation
+
+    def _restart_environment_episode(self, force_environment_reset=False):
+        self.game.new_episode()
--- a/environments/environment_wrapper.py
+++ b/environments/environment_wrapper.py
@@ -0,0 +1,138 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+from utils import *
+from configurations import Preset
+
+
+class EnvironmentWrapper:
+    def __init__(self, tuning_parameters):
+        """
+        :param tuning_parameters:
+        :type tuning_parameters: Preset
+        """
+        # env initialization
+        self.game = []
+        self.actions = {}
+        self.observation = []
+        self.reward = 0
+        self.done = False
+        self.last_action_idx = 0
+        self.measurements = []
+        self.action_space_low = 0
+        self.action_space_high = 0
+        self.action_space_abs_range = 0
+        self.discrete_controls = True
+        self.action_space_size = 0
+        self.width = 1
+        self.height = 1
+        self.is_state_type_image = True
+        self.measurements_size = 0
+        self.phase = RunPhase.TRAIN
+        self.tp = tuning_parameters
+        self.record_video_every = self.tp.visualization.record_video_every
+        self.env_id = self.tp.env.level
+        self.video_path = self.tp.visualization.video_path
+        self.is_rendered = self.tp.visualization.render
+        self.seed = self.tp.seed
+        self.frame_skip = self.tp.env.frame_skip
+
+    def _update_observation_and_measurements(self):
+        # extract all the available measurments (ovservation, depthmap, lives, ammo etc.)
+        pass
+
+    def _restart_environment_episode(self, force_environment_reset=False):
+        """
+        :param force_environment_reset: Force the environment to reset even if the episode is not done yet. 
+        :return: 
+        """
+        pass
+
+    def _idx_to_action(self, action_idx):
+        """
+        Convert an action index to one of the environment available actions.
+        For example, if the available actions are 4,5,6 then this function will map 0->4, 1->5, 2->6
+        :param action_idx: an action index between 0 and self.action_space_size - 1
+        :return: the action corresponding to the requested index
+        """
+        return self.actions[action_idx]
+
+    def _preprocess_observation(self, observation):
+        """
+        Do initial observation preprocessing such as cropping, rgb2gray, rescale etc.
+        :param observation: a raw observation from the environment
+        :return: the preprocessed observation
+        """
+        pass
+
+    def step(self, action_idx):
+        """
+        Perform a single step on the environment using the given action
+        :param action_idx: the action to perform on the environment
+        :return: A dictionary containing the observation, reward, done flag, action and measurements
+        """
+        pass
+
+    def render(self):
+        """
+        Call the environment function for rendering to the screen
+        """
+        pass
+
+    def reset(self, force_environment_reset=False):
+        """
+        Reset the environment and all the variable of the wrapper
+        :param force_environment_reset: forces environment reset even when the game did not end
+        :return: A dictionary containing the observation, reward, done flag, action and measurements
+        """
+        self._restart_environment_episode(force_environment_reset)
+        self.done = False
+        self.reward = 0.0
+        self.last_action_idx = 0
+        self._update_observation_and_measurements()
+        return {'observation': self.observation,
+                'reward': self.reward,
+                'done': self.done,
+                'action': self.last_action_idx,
+                'measurements': self.measurements}
+
+    def get_random_action(self):
+        """
+        Returns an action picked uniformly from the available actions
+        :return: a numpy array with a random action
+        """
+        if self.discrete_controls:
+            return np.random.choice(self.action_space_size)
+        else:
+            return np.random.uniform(self.action_space_low, self.action_space_high)
+
+    def change_phase(self, phase):
+        """
+        Change the current phase of the run. 
+        This is useful when different behavior is expected when testing and training
+        :param phase: The running phase of the algorithm
+        :type phase: RunPhase
+        """
+        self.phase = phase
+
+    def get_rendered_image(self):
+        """
+        Return a numpy array containing the image that will be rendered to the screen.
+        This can be different from the observation. For example, mujoco's observation is a measurements vector.
+        :return: numpy array containing the image that will be rendered to the screen
+        """
+        return self.observation
--- a/environments/gym_environment_wrapper.py
+++ b/environments/gym_environment_wrapper.py
@@ -0,0 +1,172 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import gym
+import numpy as np
+try:
+    import roboschool
+    from OpenGL import GL
+except ImportError:
+    from logger import failed_imports
+    failed_imports.append("RoboSchool")
+
+try:
+    from gym_extensions.continuous import mujoco
+except:
+    from logger import failed_imports
+    failed_imports.append("GymExtensions")
+
+try:
+    import pybullet_envs
+except ImportError:
+    from logger import failed_imports
+    failed_imports.append("PyBullet")
+
+from gym import wrappers
+from utils import force_list, RunPhase
+from environments.environment_wrapper import EnvironmentWrapper
+
+i = 0
+
+
+class GymEnvironmentWrapper(EnvironmentWrapper):
+    def __init__(self, tuning_parameters):
+        EnvironmentWrapper.__init__(self, tuning_parameters)
+        ports = (5200, 15200)
+        # env parameters
+        self.env = gym.make(self.env_id)
+        self.env_id = self.env_id
+        if self.seed is not None:
+            self.env.seed(self.seed)
+
+        self.env_spec = gym.spec(self.env_id)
+        self.none_counter = 0
+        self.discrete_controls = type(self.env.action_space) != gym.spaces.box.Box
+
+        # pybullet requires rendering before resetting the environment, but other gym environments (Pendulum) will crash
+        try:
+            if self.is_rendered:
+                self.render()
+        except:
+            pass
+
+        o = self.reset(True)['observation']
+
+        # render
+        if self.is_rendered:
+            self.render()
+
+        # self.env.render()
+        self.is_state_type_image = len(o.shape) > 1
+        if self.is_state_type_image:
+            self.width = o.shape[1]
+            self.height = o.shape[0]
+        else:
+            self.width = o.shape[0]
+
+        self.actions_description = {}
+        if self.discrete_controls:
+            self.action_space_size = self.env.action_space.n
+            self.action_space_abs_range = 0
+        else:
+            self.action_space_size = self.env.action_space.shape[0]
+            self.action_space_high = self.env.action_space.high
+            self.action_space_low = self.env.action_space.low
+            self.action_space_abs_range = np.maximum(np.abs(self.action_space_low), np.abs(self.action_space_high))
+        self.actions = {i: i for i in range(self.action_space_size)}
+        self.timestep_limit = self.env.spec.timestep_limit
+        self.current_ale_lives = 0
+        self.measurements_size = len(self.step(0)['info'].keys())
+
+        # env intialization
+        self.observation = o
+        self.reward = 0
+        self.done = False
+        self.last_action = self.actions[0]
+
+    def render(self):
+        self.env.render()
+
+    def step(self, action_idx):
+
+        if action_idx is None:
+            action_idx = self.last_action_idx
+
+        self.last_action_idx = action_idx
+
+        if self.discrete_controls:
+            action = self.actions[action_idx]
+        else:
+            action = action_idx
+
+        if hasattr(self.env.env, 'ale'):
+            prev_ale_lives = self.env.env.ale.lives()
+
+        # pendulum-v0 for example expects a list
+        if not self.discrete_controls:
+            # catching cases where the action for continuous control is a number instead of a list the
+            # size of the action space
+            if type(action_idx) == int and action_idx == 0:
+                # deal with the "reset" action 0
+                action = [0] * self.env.action_space.shape[0]
+            action = np.array(force_list(action))
+            # removing redundant dimensions such that the action size will match the expected action size from gym
+            if action.shape != self.env.action_space.shape:
+                action = np.squeeze(action)
+            action = np.clip(action, self.action_space_low, self.action_space_high)
+
+        self.observation, self.reward, self.done, self.info = self.env.step(action)
+
+        if hasattr(self.env.env, 'ale') and self.phase == RunPhase.TRAIN:
+            # signal termination for breakout life loss
+            if prev_ale_lives != self.env.env.ale.lives():
+                self.done = True
+
+        if any(env in self.env_id for env in ["Breakout", "Pong"]):
+            # crop image
+            self.observation = self.observation[34:195, :, :]
+
+        if self.is_rendered:
+            self.render()
+
+        return {'observation': self.observation,
+                'reward': self.reward,
+                'done': self.done,
+                'action': self.last_action_idx,
+                'info': self.info}
+
+    def _restart_environment_episode(self, force_environment_reset=False):
+        # prevent reset of environment if there are ale lives left
+        if "Breakout" in self.env_id and self.env.env.ale.lives() > 0 and not force_environment_reset:
+            return self.observation
+
+        if self.seed:
+            self.env.seed(self.seed)
+        observation = self.env.reset()
+        while observation is None:
+            observation = self.step(0)['observation']
+
+        if "Breakout" in self.env_id:
+            # crop image
+            observation = observation[34:195, :, :]
+
+        self.observation = observation
+
+        return observation
+
+    def get_rendered_image(self):
+        return self.env.render(mode='rgb_array')