updated CARLA to allow using actions of size 3 + automatic downloading of the CARLA imitation dataset

2025-12-17 19:20:19 +01:00 · 2018-09-16 12:07:11 +03:00
parent d3c8a5d7c1
commit cf892463e2
4 changed files with 106 additions and 32 deletions
--- a/rl_coach/environments/carla_environment.py
+++ b/rl_coach/environments/carla_environment.py
@@ -113,6 +113,7 @@ class CarlaEnvironmentParameters(EnvironmentParameters):
        self.verbose = True
        self.episode_max_time = 100000  # miliseconds for each episode
        self.allow_braking = False
+        self.separate_actions_for_throttle_and_brake = False
        self.num_speedup_steps = 30
        self.max_speed = 35.0  # km/h
        self.default_input_filter = CarlaInputFilter
@@ -131,6 +132,7 @@ class CarlaEnvironment(Environment):
                 verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int,
                 allow_braking: bool, quality: CarlaEnvironmentParameters.Quality,
                 cameras: List[CameraTypes], weather_id: List[int], experiment_path: str,
+                 separate_actions_for_throttle_and_brake: bool,
                 num_speedup_steps: int, max_speed: float, **kwargs):
        super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters)

@@ -150,6 +152,7 @@ class CarlaEnvironment(Environment):
        self.weather_id = weather_id
        self.episode_max_time = episode_max_time
        self.allow_braking = allow_braking
+        self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake
        self.camera_width = camera_width
        self.camera_height = camera_height

@@ -208,7 +211,12 @@ class CarlaEnvironment(Environment):
                high=255)

        # action space
-        self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1]))
+        if self.separate_actions_for_throttle_and_brake:
+            self.action_space = BoxActionSpace(shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]),
+                                               descriptions=["steer", "gas", "brake"])
+        else:
+            self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1]),
+                                               descriptions=["steer", "gas_and_brake"])

        # human control
        if self.human_control:
@@ -216,6 +224,7 @@ class CarlaEnvironment(Environment):
            self.steering_strength = 0.5
            self.gas_strength = 1.0
            self.brake_strength = 0.5
+            # TODO: reverse order of actions
            self.action_space = PartialDiscreteActionSpaceMap(
                target_actions=[[0., 0.],
                                [0., -self.steering_strength],
@@ -381,13 +390,18 @@ class CarlaEnvironment(Environment):
    def _take_action(self, action):
        self.control = VehicleControl()

-        # transform the 2 value action (throttle - brake, steer) into a 3 value action (throttle, brake, steer)
-        self.control.throttle = np.clip(action[0], 0, 1)
-        self.control.steer = np.clip(action[1], -1, 1)
-        self.control.brake = np.abs(np.clip(action[0], -1, 0))
+        if self.separate_actions_for_throttle_and_brake:
+            self.control.steer = np.clip(action[0], -1, 1)
+            self.control.throttle = np.clip(action[1], 0, 1)
+            self.control.brake = np.clip(action[2], 0, 1)
+        else:
+            # transform the 2 value action (steer, throttle - brake) into a 3 value action (steer, throttle, brake)
+            self.control.steer = np.clip(action[0], -1, 1)
+            self.control.throttle = np.clip(action[1], 0, 1)
+            self.control.brake = np.abs(np.clip(action[1], -1, 0))

        # prevent braking
-        if not self.allow_braking or self.control.brake < 0.1:
+        if not self.allow_braking or self.control.brake < 0.1 or self.control.throttle > self.control.brake:
            self.control.brake = 0

        # prevent over speeding
@@ -423,7 +437,7 @@ class CarlaEnvironment(Environment):
            # go over all the possible positions in a cyclic manner
            self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions

-            # choose a random goal destination TODO: follow the CoRL destinations and start positions
+            # choose a random goal destination
            self.current_goal = random.choice(self.positions)

        try:
@@ -434,7 +448,8 @@ class CarlaEnvironment(Environment):

        # start the game with some initial speed
        for i in range(self.num_speedup_steps):
-            self._take_action([1.0, 0])
+            self.control = VehicleControl(throttle=1.0, brake=0, steer=0, hand_brake=False, reverse=False)
+            self.game.send_control(VehicleControl())

    def get_rendered_image(self) -> np.ndarray:
        """
--- a/rl_coach/presets/CARLA_CIL.py
+++ b/rl_coach/presets/CARLA_CIL.py
@@ -1,4 +1,6 @@
 import numpy as np
+import os
+from logger import screen

 # make sure you have $CARLA_ROOT/PythonClient in your PYTHONPATH
 from carla.driving_benchmark.experiment_suites import CoRL2017
@@ -23,6 +25,8 @@ from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
 from rl_coach.graph_managers.graph_manager import ScheduleParameters
 from rl_coach.schedules import ConstantSchedule
 from rl_coach.spaces import ImageObservationSpace
+from rl_coach.utilities.carla_dataset_to_replay_buffer import create_dataset
+

 ####################
 # Graph Scheduling #
@@ -100,14 +104,23 @@ agent_params.exploration.evaluation_noise_percentage = 0
 # no playing during the training phase
 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0)

-# the CARLA dataset should be downloaded through the following repository:
-#   https://github.com/carla-simulator/imitation-learning
-# the dataset should then be converted to the Coach format using the script utils/carla_dataset_to_replay_buffer.py
-# the path to the converted dataset should be updated below
+# use the following command line to download and extract the CARLA dataset:
+# python rl_coach/utilities/carla_dataset_to_replay_buffer.py
 agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p"
 agent_params.memory.state_key_with_the_class_index = 'high_level_command'
 agent_params.memory.num_classes = 4

+# download dataset if it doesn't exist
+if not os.path.exists(agent_params.memory.load_memory_from_file_path):
+    screen.log_title("The CARLA dataset is not present in the following path: {}"
+                     .format(agent_params.memory.load_memory_from_file_path))
+    result = screen.ask_yes_no("Do you want to download it now?")
+    if result:
+        create_dataset(None, "./datasets/carla_train_set_replay_buffer.p")
+    else:
+        screen.error("Please update the path to the CARLA dataset in the CARLA_CIL preset", crash=True)
+
+
 ###############
 # Environment #
 ###############
@@ -116,12 +129,13 @@ env_params.level = 'town1'
 env_params.cameras = ['CameraRGB']
 env_params.camera_height = 600
 env_params.camera_width = 800
-env_params.allow_braking = False
+env_params.separate_actions_for_throttle_and_brake = True
+env_params.allow_braking = True
 env_params.quality = CarlaEnvironmentParameters.Quality.EPIC
 env_params.experiment_suite = CoRL2017('Town01')

 vis_params = VisualizationParameters()
-vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
+vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST)]
 vis_params.dump_mp4 = True

 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
--- a/rl_coach/utilities/carla_dataset_to_replay_buffer.py
+++ b/rl_coach/utilities/carla_dataset_to_replay_buffer.py
@@ -15,26 +15,47 @@
 #

 import argparse
-
-import h5py
 import os
 import sys
+
+import h5py
 import numpy as np
-from rl_coach.utils import ProgressBar
+
 from rl_coach.core_types import Transition
 from rl_coach.memories.memory import MemoryGranularity
 from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay
+from rl_coach.utils import ProgressBar, start_shell_command_and_wait
+from rl_coach.logger import screen


-if __name__ == "__main__":
-    argparser = argparse.ArgumentParser(description=__doc__)
-    argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder')
-    argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer',
-                           default='carla_train_set_replay_buffer.p')
-    args = argparser.parse_args()
+def maybe_download(dataset_root):
+    if not dataset_root or not os.path.exists(dataset_root):
+        screen.log_title("Downloading the CARLA dataset. This might take a while.")

-    train_set_root = os.path.join(args.dataset_root, 'SeqTrain')
-    validation_set_root = os.path.join(args.dataset_root, 'SeqVal')
+        google_drive_download_id = "1hloAeyamYn-H6MfV1dRtY1gJPhkR55sY"
+        filename_to_save = "datasets/CARLA_dataset.tar.gz"
+        download_command = 'wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=' \
+                           '$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies ' \
+                           '--no-check-certificate \"https://docs.google.com/uc?export=download&id={}\" -O- | ' \
+                           'sed -rn \'s/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p\')&id={}" -O {} && rm -rf /tmp/cookies.txt'\
+                           .format(google_drive_download_id, google_drive_download_id, filename_to_save)
+
+        # start downloading and wait for it to finish
+        start_shell_command_and_wait(download_command)
+
+        screen.log_title("Unzipping the dataset")
+        unzip_command = 'tar -xzf {}'.format(filename_to_save)
+        if dataset_root is not None:
+            unzip_command += " -C {}".format(dataset_root)
+
+        start_shell_command_and_wait(unzip_command)
+
+
+def create_dataset(dataset_root, output_path):
+    maybe_download(dataset_root)
+
+    train_set_root = os.path.join(dataset_root, 'SeqTrain')
+    validation_set_root = os.path.join(dataset_root, 'SeqVal')

    # training set extraction
    memory = ExperienceReplay(max_size=(MemoryGranularity.Transitions, sys.maxsize))
@@ -47,8 +68,10 @@ if __name__ == "__main__":
        observations = train_set['rgb'][:]  # forward camera
        measurements = np.expand_dims(train_set['targets'][:, 10], -1)  # forward speed
        actions = train_set['targets'][:, :3]  # steer, gas, brake
-        actions[:, 1] -= actions[:, 2]
-        actions = actions[:, :2][:, ::-1]
+        # actions[:, :2] = actions[:, 1:3]
+        # actions[:, 2] = train_set['targets'][:, 0]                           # gas, brake, steer
+        # actions[:, 1] -= actions[:, 2]
+        # actions = actions[:, :2][:, ::-1]

        high_level_commands = train_set['targets'][:, 24].astype('int') - 2  # follow lane, left, right, straight

@@ -67,5 +90,15 @@ if __name__ == "__main__":
            )
            memory.store(transition)
    progress_bar.close()
-    print("Saving pickle file to {}".format(args.output_path))
-    memory.save(args.output_path)
+    print("Saving pickle file to {}".format(output_path))
+    memory.save(output_path)
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser(description=__doc__)
+    argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder')
+    argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer',
+                           default='carla_train_set_replay_buffer.p')
+    args = argparser.parse_args()
+
+    create_dataset(args.dataset_root, args.output_path)
--- a/rl_coach/utils.py
+++ b/rl_coach/utils.py
@@ -27,6 +27,7 @@ from multiprocessing import Manager
 from subprocess import Popen
 from typing import List, Tuple

+import atexit
 import numpy as np

 killed_processes = []
@@ -571,3 +572,14 @@ class ProgressBar(object):

    def close(self):
        print("")
+
+
+def start_shell_command_and_wait(command):
+    p = Popen(command, shell=True, preexec_fn=os.setsid)
+
+    def cleanup():
+        os.killpg(os.getpgid(p.pid), 15)
+
+    atexit.register(cleanup)
+    p.wait()
+    atexit.unregister(cleanup)