diff --git a/rl_coach/agents/cil_agent.py b/rl_coach/agents/cil_agent.py index b6ca57e..976d627 100644 --- a/rl_coach/agents/cil_agent.py +++ b/rl_coach/agents/cil_agent.py @@ -29,6 +29,7 @@ class CILAlgorithmParameters(AlgorithmParameters): def __init__(self): super().__init__() self.collect_new_data = False + self.state_key_with_the_class_index = 'high_level_command' class CILNetworkParameters(NetworkParameters): @@ -63,7 +64,7 @@ class CILAgent(ImitationAgent): self.current_high_level_control = 0 def choose_action(self, curr_state): - self.current_high_level_control = curr_state['high_level_command'] + self.current_high_level_control = curr_state[self.ap.algorithm.state_key_with_the_class_index] return super().choose_action(curr_state) def extract_action_values(self, prediction): @@ -74,7 +75,7 @@ class CILAgent(ImitationAgent): target_values = self.networks['main'].online_network.predict({**batch.states(network_keys)}) - branch_to_update = batch.states(['high_level_command'])['high_level_command'] + branch_to_update = batch.states([self.ap.algorithm.state_key_with_the_class_index])[self.ap.algorithm.state_key_with_the_class_index] for idx, branch in enumerate(branch_to_update): target_values[branch][idx] = batch.actions()[idx] diff --git a/rl_coach/environments/carla_environment.py b/rl_coach/environments/carla_environment.py index 2a2fb3d..c33e4ff 100644 --- a/rl_coach/environments/carla_environment.py +++ b/rl_coach/environments/carla_environment.py @@ -34,6 +34,7 @@ try: from carla.sensor import Camera from carla.client import VehicleControl from carla.planner.planner import Planner + from carla.driving_benchmark.experiment_suites.experiment_suite import ExperimentSuite except ImportError: from rl_coach.logger import failed_imports failed_imports.append("CARLA") @@ -103,7 +104,8 @@ class CarlaEnvironmentParameters(EnvironmentParameters): self.server_width = 720 self.camera_height = 128 self.camera_width = 180 - self.config = None #'environments/CarlaSettings.ini' # TODO: remove the config to prevent confusion + self.experiment_suite = None # an optional CARLA experiment suite to use + self.config = None self.level = 'town1' self.quality = self.Quality.LOW self.cameras = [CameraTypes.FRONT] @@ -126,7 +128,7 @@ class CarlaEnvironment(Environment): seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, - verbose: bool, config: str, episode_max_time: int, + verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, num_speedup_steps: int, max_speed: float, **kwargs): @@ -161,6 +163,7 @@ class CarlaEnvironment(Environment): high=255) # setup server settings + self.experiment_suite = experiment_suite self.config = config if self.config: # load settings from file @@ -191,12 +194,17 @@ class CarlaEnvironment(Environment): # open the client self.game = CarlaClient(self.host, self.port, timeout=99999999) self.game.connect() - scene = self.game.load_settings(self.settings) + if self.experiment_suite: + self.current_experiment = self.experiment_suite.get_experiments()[0] + scene = self.game.load_settings(self.current_experiment.conditions) + else: + scene = self.game.load_settings(self.settings) # get available start positions self.positions = scene.player_start_spots - self.num_pos = len(self.positions) - self.iterator_start_positions = 0 + self.num_positions = len(self.positions) + self.current_start_position_idx = 0 + self.current_pose = 0 # action space self.action_space = BoxActionSpace(shape=2, low=np.array([-1, -1]), high=np.array([1, 1])) @@ -391,18 +399,24 @@ class CarlaEnvironment(Environment): self.game.send_control(self.control) def _restart_environment_episode(self, force_environment_reset=False): - self.iterator_start_positions += 1 - if self.iterator_start_positions >= self.num_pos: - self.iterator_start_positions = 0 + # select start and end positions + if self.experiment_suite: + # if an expeirent suite is available, follow its given poses + self.current_start_position_idx = self.current_experiment.poses[self.current_pose][0] + self.current_goal = self.current_experiment.poses[self.current_pose][1] + self.current_pose += 1 + else: + # go over all the possible positions in a cyclic manner + self.current_start_position_idx = (self.current_start_position_idx + 1) % self.num_positions + + # choose a random goal destination TODO: follow the CoRL destinations and start positions + self.current_goal = random.choice(self.positions) try: - self.game.start_episode(self.iterator_start_positions) + self.game.start_episode(self.current_start_position_idx) except: self.game.connect() - self.game.start_episode(self.iterator_start_positions) - - # choose a random goal destination TODO: follow the CoRL destinations and start positions - self.current_goal = random.choice(self.positions) + self.game.start_episode(self.current_start_position_idx) # start the game with some initial speed for i in range(self.num_speedup_steps): diff --git a/rl_coach/presets/Carla_3_Cameras_DDPG.py b/rl_coach/presets/CARLA_3_Cameras_DDPG.py similarity index 100% rename from rl_coach/presets/Carla_3_Cameras_DDPG.py rename to rl_coach/presets/CARLA_3_Cameras_DDPG.py diff --git a/rl_coach/presets/CARLA_CIL.py b/rl_coach/presets/CARLA_CIL.py index 147c0b7..cdc1f36 100644 --- a/rl_coach/presets/CARLA_CIL.py +++ b/rl_coach/presets/CARLA_CIL.py @@ -1,10 +1,17 @@ -import os -import sys - import numpy as np + +# make sure you have $CARLA_ROOT/PythonClient in your PYTHONPATH +from carla.driving_benchmark.experiment_suites import CoRL2017 + +from rl_coach.agents.cil_agent import CILAgentParameters from rl_coach.architectures.tensorflow_components.architecture import Conv2d, Dense +from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.base_parameters import VisualizationParameters +from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase from rl_coach.environments.carla_environment import CarlaEnvironmentParameters, CameraTypes +from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters from rl_coach.filters.filter import InputFilter from rl_coach.filters.observation.observation_crop_filter import ObservationCropFilter @@ -12,17 +19,10 @@ from rl_coach.filters.observation.observation_reduction_by_sub_parts_name_filter ObservationReductionBySubPartsNameFilter from rl_coach.filters.observation.observation_rescale_to_size_filter import ObservationRescaleToSizeFilter from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter -from rl_coach.schedules import ConstantSchedule -from rl_coach.spaces import ImageObservationSpace - -from rl_coach.agents.cil_agent import CILAgentParameters -from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager from rl_coach.graph_managers.graph_manager import ScheduleParameters -from rl_coach.base_parameters import VisualizationParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters -from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase -from rl_coach.environments.environment import MaxDumpMethod, SelectedPhaseOnlyDumpMethod +from rl_coach.schedules import ConstantSchedule +from rl_coach.spaces import ImageObservationSpace #################### # Graph Scheduling # @@ -116,8 +116,9 @@ env_params.level = 'town1' env_params.cameras = [CameraTypes.FRONT] env_params.camera_height = 600 env_params.camera_width = 800 -env_params.allow_braking = True +env_params.allow_braking = False env_params.quality = CarlaEnvironmentParameters.Quality.EPIC +env_params.experiment_suite = CoRL2017('Town01') vis_params = VisualizationParameters() vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] diff --git a/rl_coach/presets/Carla_DDPG.py b/rl_coach/presets/CARLA_DDPG.py similarity index 100% rename from rl_coach/presets/Carla_DDPG.py rename to rl_coach/presets/CARLA_DDPG.py diff --git a/rl_coach/presets/Carla_Dueling_DDQN.py b/rl_coach/presets/CARLA_Dueling_DDQN.py similarity index 100% rename from rl_coach/presets/Carla_Dueling_DDQN.py rename to rl_coach/presets/CARLA_Dueling_DDQN.py