1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

updated CARLA to allow using actions of size 3 + automatic downloading of the CARLA imitation dataset

This commit is contained in:
itaicaspi-intel
2018-09-16 12:07:11 +03:00
parent d3c8a5d7c1
commit cf892463e2
4 changed files with 106 additions and 32 deletions

View File

@@ -113,6 +113,7 @@ class CarlaEnvironmentParameters(EnvironmentParameters):
self.verbose = True self.verbose = True
self.episode_max_time = 100000 # miliseconds for each episode self.episode_max_time = 100000 # miliseconds for each episode
self.allow_braking = False self.allow_braking = False
self.separate_actions_for_throttle_and_brake = False
self.num_speedup_steps = 30 self.num_speedup_steps = 30
self.max_speed = 35.0 # km/h self.max_speed = 35.0 # km/h
self.default_input_filter = CarlaInputFilter self.default_input_filter = CarlaInputFilter
@@ -131,6 +132,7 @@ class CarlaEnvironment(Environment):
verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int,
allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, allow_braking: bool, quality: CarlaEnvironmentParameters.Quality,
cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, cameras: List[CameraTypes], weather_id: List[int], experiment_path: str,
separate_actions_for_throttle_and_brake: bool,
num_speedup_steps: int, max_speed: float, **kwargs): num_speedup_steps: int, max_speed: float, **kwargs):
super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters)
@@ -150,6 +152,7 @@ class CarlaEnvironment(Environment):
self.weather_id = weather_id self.weather_id = weather_id
self.episode_max_time = episode_max_time self.episode_max_time = episode_max_time
self.allow_braking = allow_braking self.allow_braking = allow_braking
self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake
self.camera_width = camera_width self.camera_width = camera_width
self.camera_height = camera_height self.camera_height = camera_height
@@ -208,7 +211,12 @@ class CarlaEnvironment(Environment):
high=255) high=255)
# action space # action space
self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1])) if self.separate_actions_for_throttle_and_brake:
self.action_space = BoxActionSpace(shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]),
descriptions=["steer", "gas", "brake"])
else:
self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1]),
descriptions=["steer", "gas_and_brake"])
# human control # human control
if self.human_control: if self.human_control:
@@ -216,6 +224,7 @@ class CarlaEnvironment(Environment):
self.steering_strength = 0.5 self.steering_strength = 0.5
self.gas_strength = 1.0 self.gas_strength = 1.0
self.brake_strength = 0.5 self.brake_strength = 0.5
# TODO: reverse order of actions
self.action_space = PartialDiscreteActionSpaceMap( self.action_space = PartialDiscreteActionSpaceMap(
target_actions=[[0., 0.], target_actions=[[0., 0.],
[0., -self.steering_strength], [0., -self.steering_strength],
@@ -381,13 +390,18 @@ class CarlaEnvironment(Environment):
def _take_action(self, action): def _take_action(self, action):
self.control = VehicleControl() self.control = VehicleControl()
# transform the 2 value action (throttle - brake, steer) into a 3 value action (throttle, brake, steer) if self.separate_actions_for_throttle_and_brake:
self.control.throttle = np.clip(action[0], 0, 1) self.control.steer = np.clip(action[0], -1, 1)
self.control.steer = np.clip(action[1], -1, 1) self.control.throttle = np.clip(action[1], 0, 1)
self.control.brake = np.abs(np.clip(action[0], -1, 0)) self.control.brake = np.clip(action[2], 0, 1)
else:
# transform the 2 value action (steer, throttle - brake) into a 3 value action (steer, throttle, brake)
self.control.steer = np.clip(action[0], -1, 1)
self.control.throttle = np.clip(action[1], 0, 1)
self.control.brake = np.abs(np.clip(action[1], -1, 0))
# prevent braking # prevent braking
if not self.allow_braking or self.control.brake < 0.1: if not self.allow_braking or self.control.brake < 0.1 or self.control.throttle > self.control.brake:
self.control.brake = 0 self.control.brake = 0
# prevent over speeding # prevent over speeding
@@ -423,7 +437,7 @@ class CarlaEnvironment(Environment):
# go over all the possible positions in a cyclic manner # go over all the possible positions in a cyclic manner
self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions
# choose a random goal destination TODO: follow the CoRL destinations and start positions # choose a random goal destination
self.current_goal = random.choice(self.positions) self.current_goal = random.choice(self.positions)
try: try:
@@ -434,7 +448,8 @@ class CarlaEnvironment(Environment):
# start the game with some initial speed # start the game with some initial speed
for i in range(self.num_speedup_steps): for i in range(self.num_speedup_steps):
self._take_action([1.0, 0]) self.control = VehicleControl(throttle=1.0, brake=0, steer=0, hand_brake=False, reverse=False)
self.game.send_control(VehicleControl())
def get_rendered_image(self) -> np.ndarray: def get_rendered_image(self) -> np.ndarray:
""" """

View File

@@ -1,4 +1,6 @@
import numpy as np import numpy as np
import os
from logger import screen
# make sure you have $CARLA_ROOT/PythonClient in your PYTHONPATH # make sure you have $CARLA_ROOT/PythonClient in your PYTHONPATH
from carla.driving_benchmark.experiment_suites import CoRL2017 from carla.driving_benchmark.experiment_suites import CoRL2017
@@ -23,6 +25,8 @@ from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import ScheduleParameters from rl_coach.graph_managers.graph_manager import ScheduleParameters
from rl_coach.schedules import ConstantSchedule from rl_coach.schedules import ConstantSchedule
from rl_coach.spaces import ImageObservationSpace from rl_coach.spaces import ImageObservationSpace
from rl_coach.utilities.carla_dataset_to_replay_buffer import create_dataset
#################### ####################
# Graph Scheduling # # Graph Scheduling #
@@ -100,14 +104,23 @@ agent_params.exploration.evaluation_noise_percentage = 0
# no playing during the training phase # no playing during the training phase
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0)
# the CARLA dataset should be downloaded through the following repository: # use the following command line to download and extract the CARLA dataset:
# https://github.com/carla-simulator/imitation-learning # python rl_coach/utilities/carla_dataset_to_replay_buffer.py
# the dataset should then be converted to the Coach format using the script utils/carla_dataset_to_replay_buffer.py
# the path to the converted dataset should be updated below
agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p" agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p"
agent_params.memory.state_key_with_the_class_index = 'high_level_command' agent_params.memory.state_key_with_the_class_index = 'high_level_command'
agent_params.memory.num_classes = 4 agent_params.memory.num_classes = 4
# download dataset if it doesn't exist
if not os.path.exists(agent_params.memory.load_memory_from_file_path):
screen.log_title("The CARLA dataset is not present in the following path: {}"
.format(agent_params.memory.load_memory_from_file_path))
result = screen.ask_yes_no("Do you want to download it now?")
if result:
create_dataset(None, "./datasets/carla_train_set_replay_buffer.p")
else:
screen.error("Please update the path to the CARLA dataset in the CARLA_CIL preset", crash=True)
############### ###############
# Environment # # Environment #
############### ###############
@@ -116,12 +129,13 @@ env_params.level = 'town1'
env_params.cameras = ['CameraRGB'] env_params.cameras = ['CameraRGB']
env_params.camera_height = 600 env_params.camera_height = 600
env_params.camera_width = 800 env_params.camera_width = 800
env_params.allow_braking = False env_params.separate_actions_for_throttle_and_brake = True
env_params.allow_braking = True
env_params.quality = CarlaEnvironmentParameters.Quality.EPIC env_params.quality = CarlaEnvironmentParameters.Quality.EPIC
env_params.experiment_suite = CoRL2017('Town01') env_params.experiment_suite = CoRL2017('Town01')
vis_params = VisualizationParameters() vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST)]
vis_params.dump_mp4 = True vis_params.dump_mp4 = True
graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,

View File

@@ -15,26 +15,47 @@
# #
import argparse import argparse
import h5py
import os import os
import sys import sys
import h5py
import numpy as np import numpy as np
from rl_coach.utils import ProgressBar
from rl_coach.core_types import Transition from rl_coach.core_types import Transition
from rl_coach.memories.memory import MemoryGranularity from rl_coach.memories.memory import MemoryGranularity
from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay
from rl_coach.utils import ProgressBar, start_shell_command_and_wait
from rl_coach.logger import screen
if __name__ == "__main__": def maybe_download(dataset_root):
argparser = argparse.ArgumentParser(description=__doc__) if not dataset_root or not os.path.exists(dataset_root):
argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder') screen.log_title("Downloading the CARLA dataset. This might take a while.")
argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer',
default='carla_train_set_replay_buffer.p')
args = argparser.parse_args()
train_set_root = os.path.join(args.dataset_root, 'SeqTrain') google_drive_download_id = "1hloAeyamYn-H6MfV1dRtY1gJPhkR55sY"
validation_set_root = os.path.join(args.dataset_root, 'SeqVal') filename_to_save = "datasets/CARLA_dataset.tar.gz"
download_command = 'wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=' \
'$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies ' \
'--no-check-certificate \"https://docs.google.com/uc?export=download&id={}\" -O- | ' \
'sed -rn \'s/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p\')&id={}" -O {} && rm -rf /tmp/cookies.txt'\
.format(google_drive_download_id, google_drive_download_id, filename_to_save)
# start downloading and wait for it to finish
start_shell_command_and_wait(download_command)
screen.log_title("Unzipping the dataset")
unzip_command = 'tar -xzf {}'.format(filename_to_save)
if dataset_root is not None:
unzip_command += " -C {}".format(dataset_root)
start_shell_command_and_wait(unzip_command)
def create_dataset(dataset_root, output_path):
maybe_download(dataset_root)
train_set_root = os.path.join(dataset_root, 'SeqTrain')
validation_set_root = os.path.join(dataset_root, 'SeqVal')
# training set extraction # training set extraction
memory = ExperienceReplay(max_size=(MemoryGranularity.Transitions, sys.maxsize)) memory = ExperienceReplay(max_size=(MemoryGranularity.Transitions, sys.maxsize))
@@ -44,11 +65,13 @@ if __name__ == "__main__":
for file_idx, file in enumerate(train_set_files[:3000]): for file_idx, file in enumerate(train_set_files[:3000]):
progress_bar.update(file_idx, "extracting file {}".format(file)) progress_bar.update(file_idx, "extracting file {}".format(file))
train_set = h5py.File(os.path.join(train_set_root, file), 'r') train_set = h5py.File(os.path.join(train_set_root, file), 'r')
observations = train_set['rgb'][:] # forward camera observations = train_set['rgb'][:] # forward camera
measurements = np.expand_dims(train_set['targets'][:, 10], -1) # forward speed measurements = np.expand_dims(train_set['targets'][:, 10], -1) # forward speed
actions = train_set['targets'][:, :3] # steer, gas, brake actions = train_set['targets'][:, :3] # steer, gas, brake
actions[:, 1] -= actions[:, 2] # actions[:, :2] = actions[:, 1:3]
actions = actions[:, :2][:, ::-1] # actions[:, 2] = train_set['targets'][:, 0] # gas, brake, steer
# actions[:, 1] -= actions[:, 2]
# actions = actions[:, :2][:, ::-1]
high_level_commands = train_set['targets'][:, 24].astype('int') - 2 # follow lane, left, right, straight high_level_commands = train_set['targets'][:, 24].astype('int') - 2 # follow lane, left, right, straight
@@ -67,5 +90,15 @@ if __name__ == "__main__":
) )
memory.store(transition) memory.store(transition)
progress_bar.close() progress_bar.close()
print("Saving pickle file to {}".format(args.output_path)) print("Saving pickle file to {}".format(output_path))
memory.save(args.output_path) memory.save(output_path)
if __name__ == "__main__":
argparser = argparse.ArgumentParser(description=__doc__)
argparser.add_argument('-d', '--dataset_root', help='The path to the CARLA dataset root folder')
argparser.add_argument('-o', '--output_path', help='The path to save the resulting replay buffer',
default='carla_train_set_replay_buffer.p')
args = argparser.parse_args()
create_dataset(args.dataset_root, args.output_path)

View File

@@ -27,6 +27,7 @@ from multiprocessing import Manager
from subprocess import Popen from subprocess import Popen
from typing import List, Tuple from typing import List, Tuple
import atexit
import numpy as np import numpy as np
killed_processes = [] killed_processes = []
@@ -571,3 +572,14 @@ class ProgressBar(object):
def close(self): def close(self):
print("") print("")
def start_shell_command_and_wait(command):
p = Popen(command, shell=True, preexec_fn=os.setsid)
def cleanup():
os.killpg(os.getpgid(p.pid), 15)
atexit.register(cleanup)
p.wait()
atexit.unregister(cleanup)