pre-release 0.10.0

2026-02-17 06:35:47 +01:00 · 2018-08-13 17:11:34 +03:00
parent d44c329bb8
commit 19ca5c24b1
485 changed files with 33292 additions and 16770 deletions
--- a/rl_coach/environments/mujoco/init.py
+++ b/rl_coach/environments/mujoco/init.py
--- a/rl_coach/environments/mujoco/common/init.py
+++ b/rl_coach/environments/mujoco/common/init.py
@@ -0,0 +1,38 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Functions to manage the common assets for domains."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from dm_control.utils import resources
+
+_SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
+_FILENAMES = [
+    "common/materials.xml",
+    "common/skybox.xml",
+    "common/visual.xml",
+]
+
+ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
+          for filename in _FILENAMES}
+
+
+def read_model(model_filename):
+  """Reads a model XML file and returns its contents as a string."""
+  return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
--- a/rl_coach/environments/mujoco/common/materials.xml
+++ b/rl_coach/environments/mujoco/common/materials.xml
@@ -0,0 +1,22 @@
+<!--
+Common textures, colors and materials to be used throughout this suite. Some
+materials such as xxx_highlight are activated on occurence of certain events,
+for example receiving a positive reward.
+-->
+<mujoco>
+  <asset>
+    <texture name="grid" type="2d" builtin="checker" rgb1=".1 .2 .3" rgb2=".2 .3 .4" width="300" height="300" mark="edge" markrgb=".2 .3 .4"/>
+    <material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
+    <material name="self" rgba=".7 .5 .3 1"/>
+    <material name="self_default" rgba=".7 .5 .3 1"/>
+    <material name="self_highlight" rgba="0 .5 .3 1"/>
+    <material name="effector" rgba=".7 .4 .2 1"/>
+    <material name="effector_default" rgba=".7 .4 .2 1"/>
+    <material name="effector_highlight" rgba="0 .5 .3 1"/>
+    <material name="decoration" rgba=".3 .5 .7 1"/>
+    <material name="eye" rgba="0 .2 1 1"/>
+    <material name="target" rgba=".6 .3 .3 1"/>
+    <material name="target_default" rgba=".6 .3 .3 1"/>
+    <material name="target_highlight" rgba=".6 .3 .3 .4"/>
+  </asset>
+</mujoco>
--- a/rl_coach/environments/mujoco/common/skybox.xml
+++ b/rl_coach/environments/mujoco/common/skybox.xml
@@ -0,0 +1,6 @@
+<mujoco>
+  <asset>
+      <texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0"
+               width="800" height="800" mark="random" markrgb="1 1 1"/>
+  </asset>
+</mujoco>
--- a/rl_coach/environments/mujoco/common/visual.xml
+++ b/rl_coach/environments/mujoco/common/visual.xml
@@ -0,0 +1,7 @@
+<mujoco>
+  <visual>
+    <headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
+    <map znear=".01"/>
+    <quality shadowsize="2048"/>
+  </visual>
+</mujoco>
--- a/rl_coach/environments/mujoco/pendulum_with_goals.py
+++ b/rl_coach/environments/mujoco/pendulum_with_goals.py
@@ -0,0 +1,185 @@
+import numpy as np
+import gym
+import os
+from gym import spaces
+from gym.envs.registration import EnvSpec
+
+from mujoco_py import load_model_from_path, MjSim , MjViewer, MjRenderContextOffscreen
+
+
+class PendulumWithGoals(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30
+    }
+
+    def __init__(self, goal_reaching_thresholds=np.array([0.075, 0.075, 0.75]),
+                 goal_not_reached_penalty=-1, goal_reached_reward=0, terminate_on_goal_reaching=True,
+                 time_limit=1000, frameskip=1, random_goals_instead_of_standing_goal=False,
+                 polar_coordinates: bool=False):
+        super().__init__()
+        dir = os.path.dirname(__file__)
+        model = load_model_from_path(dir + "/pendulum_with_goals.xml")
+
+        self.sim = MjSim(model)
+        self.viewer = None
+        self.rgb_viewer = None
+
+        self.frameskip = frameskip
+        self.goal = None
+        self.goal_reaching_thresholds = goal_reaching_thresholds
+        self.goal_not_reached_penalty = goal_not_reached_penalty
+        self.goal_reached_reward = goal_reached_reward
+        self.terminate_on_goal_reaching = terminate_on_goal_reaching
+        self.time_limit = time_limit
+        self.current_episode_steps_counter = 0
+        self.random_goals_instead_of_standing_goal = random_goals_instead_of_standing_goal
+        self.polar_coordinates = polar_coordinates
+
+        # spaces definition
+        self.action_space = spaces.Box(low=-self.sim.model.actuator_ctrlrange[:, 1],
+                                       high=self.sim.model.actuator_ctrlrange[:, 1],
+                                       dtype=np.float32)
+        if self.polar_coordinates:
+            self.observation_space = spaces.Dict({
+                "observation": spaces.Box(low=np.array([-np.pi, -15]),
+                                          high=np.array([np.pi, 15]),
+                                          dtype=np.float32),
+                "desired_goal": spaces.Box(low=np.array([-np.pi, -15]),
+                                           high=np.array([np.pi, 15]),
+                                           dtype=np.float32),
+                "achieved_goal": spaces.Box(low=np.array([-np.pi, -15]),
+                                            high=np.array([np.pi, 15]),
+                                            dtype=np.float32)
+            })
+        else:
+            self.observation_space = spaces.Dict({
+                "observation": spaces.Box(low=np.array([-1, -1, -15]),
+                                          high=np.array([1, 1, 15]),
+                                          dtype=np.float32),
+                "desired_goal": spaces.Box(low=np.array([-1, -1, -15]),
+                                           high=np.array([1, 1, 15]),
+                                           dtype=np.float32),
+                "achieved_goal": spaces.Box(low=np.array([-1, -1, -15]),
+                                            high=np.array([1, 1, 15]),
+                                            dtype=np.float32)
+            })
+
+        self.spec = EnvSpec('PendulumWithGoals-v0')
+        self.spec.reward_threshold = self.goal_not_reached_penalty * self.time_limit
+
+        self.reset()
+
+    def _goal_reached(self):
+        observation = self._get_obs()
+        if np.any(np.abs(observation['achieved_goal'] - observation['desired_goal']) > self.goal_reaching_thresholds):
+            return False
+        else:
+            return True
+
+    def _terminate(self):
+        if (self._goal_reached() and self.terminate_on_goal_reaching) or \
+                        self.current_episode_steps_counter >= self.time_limit:
+            return True
+        else:
+            return False
+
+    def _reward(self):
+        if self._goal_reached():
+            return self.goal_reached_reward
+        else:
+            return self.goal_not_reached_penalty
+
+    def step(self, action):
+        self.sim.data.ctrl[:] = action
+        for _ in range(self.frameskip):
+            self.sim.step()
+
+        self.current_episode_steps_counter += 1
+
+        state = self._get_obs()
+
+        # visualize the angular velocities
+        state_velocity = np.copy(state['observation'][-1] / 20)
+        goal_velocity = self.goal[-1] / 20
+        self.sim.model.site_size[2] = np.array([0.01, 0.01, state_velocity])
+        self.sim.data.mocap_pos[2] = np.array([0.85, 0, 0.75 + state_velocity])
+        self.sim.model.site_size[3] = np.array([0.01, 0.01, goal_velocity])
+        self.sim.data.mocap_pos[3] = np.array([1.15, 0, 0.75 + goal_velocity])
+
+        return state, self._reward(), self._terminate(), {}
+
+    def _get_obs(self):
+
+        """
+        y
+
+        ^
+        |____
+        |   /
+        |  /
+        |~/
+        |/
+        --------> x
+
+        """
+
+        # observation
+        angle = self.sim.data.qpos
+        angular_velocity = self.sim.data.qvel
+        if self.polar_coordinates:
+            observation = np.concatenate([angle - np.pi, angular_velocity])
+        else:
+            x = np.sin(angle)
+            y = np.cos(angle)  # qpos is the angle relative to a standing pole
+            observation = np.concatenate([x, y, angular_velocity])
+
+        return {
+            "observation": observation,
+            "desired_goal": self.goal,
+            "achieved_goal": observation
+        }
+
+    def reset(self):
+        self.current_episode_steps_counter = 0
+
+        # set initial state
+        angle = np.random.uniform(np.pi / 4, 7 * np.pi / 4)
+        angular_velocity = np.random.uniform(-0.05, 0.05)
+        self.sim.data.qpos[0] = angle
+        self.sim.data.qvel[0] = angular_velocity
+        self.sim.step()
+
+        # goal
+        if self.random_goals_instead_of_standing_goal:
+            angle_target = np.random.uniform(-np.pi / 8, np.pi / 8)
+            angular_velocity_target = np.random.uniform(-0.2, 0.2)
+        else:
+            angle_target = 0
+            angular_velocity_target = 0
+
+        # convert target values to goal
+        x_target = np.sin(angle_target)
+        y_target = np.cos(angle_target)
+        if self.polar_coordinates:
+            self.goal = np.array([angle_target - np.pi, angular_velocity_target])
+        else:
+            self.goal = np.array([x_target, y_target, angular_velocity_target])
+
+        # visualize the goal
+        self.sim.data.mocap_pos[0] = [x_target, 0, y_target]
+
+        return self._get_obs()
+
+    def render(self, mode='human', close=False):
+        if mode == 'human':
+            if self.viewer is None:
+                self.viewer = MjViewer(self.sim)
+            self.viewer.render()
+        elif mode == 'rgb_array':
+            if self.rgb_viewer is None:
+                self.rgb_viewer = MjRenderContextOffscreen(self.sim, 0)
+            self.rgb_viewer.render(500, 500)
+            # window size used for old mujoco-py:
+            data = self.rgb_viewer.read_pixels(500, 500, depth=False)
+            # original image is upside-down, so flip it
+            return data[::-1, :, :]
--- a/rl_coach/environments/mujoco/pendulum_with_goals.xml
+++ b/rl_coach/environments/mujoco/pendulum_with_goals.xml
@@ -0,0 +1,42 @@
+<mujoco model="pendulum_with_goals">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.002">
+    <flag contact="disable" energy="enable"/>
+  </option>
+
+  <worldbody>
+    <light name="light" pos="0 0 2"/>
+    <geom name="floor" size="2 2 .2" type="plane" material="grid"/>
+    <camera name="fixed" pos="0 -1.5 2" xyaxes='1 0 0 0 1 1'/>
+    <camera name="lookat" mode="targetbodycom" target="pole" pos="0 -2 1"/>
+    <body name="pole" pos="0 0 .6">
+      <joint name="hinge" type="hinge" axis="0 1 0" damping="0.1"/>
+      <geom name="base" material="decoration" type="cylinder" fromto="0 -.03 0 0 .03 0" size="0.021" mass="0"/>
+      <geom name="pole" material="self" type="capsule" fromto="0 0 0 0 0 0.5" size="0.02" mass="0"/>
+      <geom name="mass" material="effector" type="sphere" pos="0 0 0.5" size="0.05" mass="1"/>
+    </body>
+
+    <body name="end_goal" pos="0 0 0" mocap="true">
+        <site type="sphere" size="0.05" rgba="1 1 0 1" />
+    </body>
+    <!--<body name="sub_goal" pos="0 0 0" mocap="true">-->
+        <!--<site type="sphere" size="0.05" rgba="1 0 1 1" />-->
+    <!--</body>-->
+    <body name="current_velo" pos="0.0 0 0.0" mocap="true">
+        <site type="box" size="0.01 0.01 0.1" rgba="1 1 1 1" />
+    </body>
+    <body name="subgoal_velo" pos="0.0 0 0.0" mocap="true">
+        <site type="box" size="0.01 0.01 0.1" rgba="1 0 1 1" />
+    </body>
+    <body name="zero_velo" pos="1.0 0 0.75" mocap="true">
+        <site type="box" size="0.3 0.01 0.01" rgba="1 0 0 1" />
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="torque" joint="hinge" gear="1" ctrlrange="-2 2" ctrllimited="true"/>
+  </actuator>
+</mujoco>