coach v0.8.0

2026-02-12 11:45:45 +01:00 · 2017-10-19 13:10:15 +03:00
parent 7f77813a39
commit 1d4c3455e7
123 changed files with 10996 additions and 203 deletions
--- a/environments/environment_wrapper.py
+++ b/environments/environment_wrapper.py
@@ -0,0 +1,138 @@
+#
+# Copyright (c) 2017 Intel Corporation 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+from utils import *
+from configurations import Preset
+
+
+class EnvironmentWrapper:
+    def __init__(self, tuning_parameters):
+        """
+        :param tuning_parameters:
+        :type tuning_parameters: Preset
+        """
+        # env initialization
+        self.game = []
+        self.actions = {}
+        self.observation = []
+        self.reward = 0
+        self.done = False
+        self.last_action_idx = 0
+        self.measurements = []
+        self.action_space_low = 0
+        self.action_space_high = 0
+        self.action_space_abs_range = 0
+        self.discrete_controls = True
+        self.action_space_size = 0
+        self.width = 1
+        self.height = 1
+        self.is_state_type_image = True
+        self.measurements_size = 0
+        self.phase = RunPhase.TRAIN
+        self.tp = tuning_parameters
+        self.record_video_every = self.tp.visualization.record_video_every
+        self.env_id = self.tp.env.level
+        self.video_path = self.tp.visualization.video_path
+        self.is_rendered = self.tp.visualization.render
+        self.seed = self.tp.seed
+        self.frame_skip = self.tp.env.frame_skip
+
+    def _update_observation_and_measurements(self):
+        # extract all the available measurments (ovservation, depthmap, lives, ammo etc.)
+        pass
+
+    def _restart_environment_episode(self, force_environment_reset=False):
+        """
+        :param force_environment_reset: Force the environment to reset even if the episode is not done yet. 
+        :return: 
+        """
+        pass
+
+    def _idx_to_action(self, action_idx):
+        """
+        Convert an action index to one of the environment available actions.
+        For example, if the available actions are 4,5,6 then this function will map 0->4, 1->5, 2->6
+        :param action_idx: an action index between 0 and self.action_space_size - 1
+        :return: the action corresponding to the requested index
+        """
+        return self.actions[action_idx]
+
+    def _preprocess_observation(self, observation):
+        """
+        Do initial observation preprocessing such as cropping, rgb2gray, rescale etc.
+        :param observation: a raw observation from the environment
+        :return: the preprocessed observation
+        """
+        pass
+
+    def step(self, action_idx):
+        """
+        Perform a single step on the environment using the given action
+        :param action_idx: the action to perform on the environment
+        :return: A dictionary containing the observation, reward, done flag, action and measurements
+        """
+        pass
+
+    def render(self):
+        """
+        Call the environment function for rendering to the screen
+        """
+        pass
+
+    def reset(self, force_environment_reset=False):
+        """
+        Reset the environment and all the variable of the wrapper
+        :param force_environment_reset: forces environment reset even when the game did not end
+        :return: A dictionary containing the observation, reward, done flag, action and measurements
+        """
+        self._restart_environment_episode(force_environment_reset)
+        self.done = False
+        self.reward = 0.0
+        self.last_action_idx = 0
+        self._update_observation_and_measurements()
+        return {'observation': self.observation,
+                'reward': self.reward,
+                'done': self.done,
+                'action': self.last_action_idx,
+                'measurements': self.measurements}
+
+    def get_random_action(self):
+        """
+        Returns an action picked uniformly from the available actions
+        :return: a numpy array with a random action
+        """
+        if self.discrete_controls:
+            return np.random.choice(self.action_space_size)
+        else:
+            return np.random.uniform(self.action_space_low, self.action_space_high)
+
+    def change_phase(self, phase):
+        """
+        Change the current phase of the run. 
+        This is useful when different behavior is expected when testing and training
+        :param phase: The running phase of the algorithm
+        :type phase: RunPhase
+        """
+        self.phase = phase
+
+    def get_rendered_image(self):
+        """
+        Return a numpy array containing the image that will be rendered to the screen.
+        This can be different from the observation. For example, mujoco's observation is a measurements vector.
+        :return: numpy array containing the image that will be rendered to the screen
+        """
+        return self.observation