diff --git a/rl_coach/environments/carla_environment.py b/rl_coach/environments/carla_environment.py index ed95055..49e79d9 100644 --- a/rl_coach/environments/carla_environment.py +++ b/rl_coach/environments/carla_environment.py @@ -113,6 +113,7 @@ class CarlaEnvironmentParameters(EnvironmentParameters): self.verbose = True self.episode_max_time = 100000 # miliseconds for each episode self.allow_braking = False + self.separate_actions_for_throttle_and_brake = False self.num_speedup_steps = 30 self.max_speed = 35.0 # km/h self.default_input_filter = CarlaInputFilter @@ -131,6 +132,7 @@ class CarlaEnvironment(Environment): verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, + separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) @@ -150,6 +152,7 @@ class CarlaEnvironment(Environment): self.weather_id = weather_id self.episode_max_time = episode_max_time self.allow_braking = allow_braking + self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake self.camera_width = camera_width self.camera_height = camera_height @@ -208,7 +211,12 @@ class CarlaEnvironment(Environment): high=255) # action space - self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1])) + if self.separate_actions_for_throttle_and_brake: + self.action_space = BoxActionSpace(shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]), + descriptions=["steer", "gas", "brake"]) + else: + self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1]), + descriptions=["steer", "gas_and_brake"]) # human control if self.human_control: @@ -216,6 +224,7 @@ class CarlaEnvironment(Environment): self.steering_strength = 0.5 self.gas_strength = 1.0 self.brake_strength = 0.5 + # TODO: reverse order of actions self.action_space = PartialDiscreteActionSpaceMap( target_actions=[[0., 0.], [0., -self.steering_strength], @@ -381,13 +390,18 @@ class CarlaEnvironment(Environment): def _take_action(self, action): self.control = VehicleControl() - # transform the 2 value action (throttle - brake, steer) into a 3 value action (throttle, brake, steer) - self.control.throttle = np.clip(action[0], 0, 1) - self.control.steer = np.clip(action[1], -1, 1) - self.control.brake = np.abs(np.clip(action[0], -1, 0)) + if self.separate_actions_for_throttle_and_brake: + self.control.steer = np.clip(action[0], -1, 1) + self.control.throttle = np.clip(action[1], 0, 1) + self.control.brake = np.clip(action[2], 0, 1) + else: + # transform the 2 value action (steer, throttle - brake) into a 3 value action (steer, throttle, brake) + self.control.steer = np.clip(action[0], -1, 1) + self.control.throttle = np.clip(action[1], 0, 1) + self.control.brake = np.abs(np.clip(action[1], -1, 0)) # prevent braking - if not self.allow_braking or self.control.brake < 0.1: + if not self.allow_braking or self.control.brake < 0.1 or self.control.throttle > self.control.brake: self.control.brake = 0 # prevent over speeding @@ -423,7 +437,7 @@ class CarlaEnvironment(Environment): # go over all the possible positions in a cyclic manner self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions - # choose a random goal destination TODO: follow the CoRL destinations and start positions + # choose a random goal destination self.current_goal = random.choice(self.positions) try: @@ -434,7 +448,8 @@ class CarlaEnvironment(Environment): # start the game with some initial speed for i in range(self.num_speedup_steps): - self._take_action([1.0, 0]) + self.control = VehicleControl(throttle=1.0, brake=0, steer=0, hand_brake=False, reverse=False) + self.game.send_control(VehicleControl()) def get_rendered_image(self) -> np.ndarray: """ diff --git a/rl_coach/presets/CARLA_CIL.py b/rl_coach/presets/CARLA_CIL.py index e4bf20c..db0bd92 100644 --- a/rl_coach/presets/CARLA_CIL.py +++ b/rl_coach/presets/CARLA_CIL.py @@ -1,4 +1,6 @@ import numpy as np +import os +from logger import screen # make sure you have $CARLA_ROOT/PythonClient in your PYTHONPATH from carla.driving_benchmark.experiment_suites import CoRL2017 @@ -23,6 +25,8 @@ from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.schedules import ConstantSchedule from rl_coach.spaces import ImageObservationSpace +from rl_coach.utilities.carla_dataset_to_replay_buffer import create_dataset + #################### # Graph Scheduling # @@ -100,14 +104,23 @@ agent_params.exploration.evaluation_noise_percentage = 0 # no playing during the training phase agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0) -# the CARLA dataset should be downloaded through the following repository: -# https://github.com/carla-simulator/imitation-learning -# the dataset should then be converted to the Coach format using the script utils/carla_dataset_to_replay_buffer.py -# the path to the converted dataset should be updated below +# use the following command line to download and extract the CARLA dataset: +# python rl_coach/utilities/carla_dataset_to_replay_buffer.py agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p" agent_params.memory.state_key_with_the_class_index = 'high_level_command' agent_params.memory.num_classes = 4 +# download dataset if it doesn't exist +if not os.path.exists(agent_params.memory.load_memory_from_file_path): + screen.log_title("The CARLA dataset is not present in the following path: {}" + .format(agent_params.memory.load_memory_from_file_path)) + result = screen.ask_yes_no("Do you want to download it now?") + if result: + create_dataset(None, "./datasets/carla_train_set_replay_buffer.p") + else: + screen.error("Please update the path to the CARLA dataset in the CARLA_CIL preset", crash=True) + + ############### # Environment # ############### @@ -116,12 +129,13 @@ env_params.level = 'town1' env_params.cameras = ['CameraRGB'] env_params.camera_height = 600 env_params.camera_width = 800 -env_params.allow_braking = False +env_params.separate_actions_for_throttle_and_brake = True +env_params.allow_braking = True env_params.quality = CarlaEnvironmentParameters.Quality.EPIC env_params.experiment_suite = CoRL2017('Town01') vis_params = VisualizationParameters() -vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] +vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST)] vis_params.dump_mp4 = True graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, diff --git a/rl_coach/utilities/carla_dataset_to_replay_buffer.py b/rl_coach/utilities/carla_dataset_to_replay_buffer.py index 207b57e..4f1bbb6 100644 --- a/rl_coach/utilities/carla_dataset_to_replay_buffer.py +++ b/rl_coach/utilities/carla_dataset_to_replay_buffer.py @@ -15,26 +15,47 @@ # import argparse - -import h5py import os import sys + +import h5py import numpy as np -from rl_coach.utils import ProgressBar + from rl_coach.core_types import Transition from rl_coach.memories.memory import MemoryGranularity from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay +from rl_coach.utils import ProgressBar, start_shell_command_and_wait +from rl_coach.logger import screen -if __name__ == "__main__": - argparser = argparse.ArgumentParser(description=__doc__) - argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder') - argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer', - default='carla_train_set_replay_buffer.p') - args = argparser.parse_args() +def maybe_download(dataset_root): + if not dataset_root or not os.path.exists(dataset_root): + screen.log_title("Downloading the CARLA dataset. This might take a while.") - train_set_root = os.path.join(args.dataset_root, 'SeqTrain') - validation_set_root = os.path.join(args.dataset_root, 'SeqVal') + google_drive_download_id = "1hloAeyamYn-H6MfV1dRtY1gJPhkR55sY" + filename_to_save = "datasets/CARLA_dataset.tar.gz" + download_command = 'wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=' \ + '$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies ' \ + '--no-check-certificate \"https://docs.google.com/uc?export=download&id={}\" -O- | ' \ + 'sed -rn \'s/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p\')&id={}" -O {} && rm -rf /tmp/cookies.txt'\ + .format(google_drive_download_id, google_drive_download_id, filename_to_save) + + # start downloading and wait for it to finish + start_shell_command_and_wait(download_command) + + screen.log_title("Unzipping the dataset") + unzip_command = 'tar -xzf {}'.format(filename_to_save) + if dataset_root is not None: + unzip_command += " -C {}".format(dataset_root) + + start_shell_command_and_wait(unzip_command) + + +def create_dataset(dataset_root, output_path): + maybe_download(dataset_root) + + train_set_root = os.path.join(dataset_root, 'SeqTrain') + validation_set_root = os.path.join(dataset_root, 'SeqVal') # training set extraction memory = ExperienceReplay(max_size=(MemoryGranularity.Transitions, sys.maxsize)) @@ -44,11 +65,13 @@ if __name__ == "__main__": for file_idx, file in enumerate(train_set_files[:3000]): progress_bar.update(file_idx, "extracting file {}".format(file)) train_set = h5py.File(os.path.join(train_set_root, file), 'r') - observations = train_set['rgb'][:] # forward camera - measurements = np.expand_dims(train_set['targets'][:, 10], -1) # forward speed - actions = train_set['targets'][:, :3] # steer, gas, brake - actions[:, 1] -= actions[:, 2] - actions = actions[:, :2][:, ::-1] + observations = train_set['rgb'][:] # forward camera + measurements = np.expand_dims(train_set['targets'][:, 10], -1) # forward speed + actions = train_set['targets'][:, :3] # steer, gas, brake + # actions[:, :2] = actions[:, 1:3] + # actions[:, 2] = train_set['targets'][:, 0] # gas, brake, steer + # actions[:, 1] -= actions[:, 2] + # actions = actions[:, :2][:, ::-1] high_level_commands = train_set['targets'][:, 24].astype('int') - 2 # follow lane, left, right, straight @@ -67,5 +90,15 @@ if __name__ == "__main__": ) memory.store(transition) progress_bar.close() - print("Saving pickle file to {}".format(args.output_path)) - memory.save(args.output_path) + print("Saving pickle file to {}".format(output_path)) + memory.save(output_path) + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser(description=__doc__) + argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder') + argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer', + default='carla_train_set_replay_buffer.p') + args = argparser.parse_args() + + create_dataset(args.dataset_root, args.output_path) \ No newline at end of file diff --git a/rl_coach/utils.py b/rl_coach/utils.py index db58baa..9b16c6e 100644 --- a/rl_coach/utils.py +++ b/rl_coach/utils.py @@ -27,6 +27,7 @@ from multiprocessing import Manager from subprocess import Popen from typing import List, Tuple +import atexit import numpy as np killed_processes = [] @@ -571,3 +572,14 @@ class ProgressBar(object): def close(self): print("") + + +def start_shell_command_and_wait(command): + p = Popen(command, shell=True, preexec_fn=os.setsid) + + def cleanup(): + os.killpg(os.getpgid(p.pid), 15) + + atexit.register(cleanup) + p.wait() + atexit.unregister(cleanup)