parameter noise exploration - using Noisy Nets

2026-02-01 13:25:45 +01:00 · 2018-08-27 18:19:01 +03:00
parent 658b437079
commit 1aa2ab0590
49 changed files with 536 additions and 433 deletions
--- a/rl_coach/environments/carla_environment.py
+++ b/rl_coach/environments/carla_environment.py
@@ -18,6 +18,7 @@ import random
 import sys
 from os import path, environ

+from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
 from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
 from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter

@@ -208,7 +209,6 @@ class CarlaEnvironment(Environment):
                                [self.gas_strength, self.steering_strength],
                                [self.brake_strength, -self.steering_strength],
                                [self.brake_strength, self.steering_strength]],
-                target_action_space=self.action_space,
                descriptions=['NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE',
                              'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT',
                              'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT']
--- a/rl_coach/environments/environment_group.py
+++ b/rl_coach/environments/environment_group.py
@@ -1,149 +0,0 @@
-
-########################################################################################################################
-####### Currently we are ignoring more complex cases including EnvironmentGroups - DO NOT USE THIS FILE ****************
-########################################################################################################################
-
-
-
-
-# #
-# # Copyright (c) 2017 Intel Corporation
-# #
-# # Licensed under the Apache License, Version 2.0 (the "License");
-# # you may not use this file except in compliance with the License.
-# # You may obtain a copy of the License at
-# #
-# #      http://www.apache.org/licenses/LICENSE-2.0
-# #
-# # Unless required by applicable law or agreed to in writing, software
-# # distributed under the License is distributed on an "AS IS" BASIS,
-# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# # See the License for the specific language governing permissions and
-# # limitations under the License.
-# #
-#
-# from typing import Union, List, Dict
-# import numpy as np
-# from environments import create_environment
-# from environments.environment import Environment
-# from environments.environment_interface import EnvironmentInterface, ActionType, ActionSpace
-# from core_types import GoalType, Transition
-#
-#
-# class EnvironmentGroup(EnvironmentInterface):
-#     """
-#     An EnvironmentGroup is a group of different environments.
-#     In the simple case, it will contain a single environment. But it can also contain multiple environments,
-#     where the agent can then act on them as a batch, such that the prediction of the action is more efficient.
-#     """
-#     def __init__(self, environments_parameters: List[Environment]):
-#         self.environments_parameters = environments_parameters
-#         self.environments = []
-#         self.action_space = []
-#         self.outgoing_control = []
-#         self._last_env_response = []
-#
-#     @property
-#     def action_space(self) -> Union[List[ActionSpace], ActionSpace]:
-#         """
-#         Get the action space of the environment
-#         :return: the action space
-#         """
-#         return self.action_space
-#
-#     @action_space.setter
-#     def action_space(self, val: Union[List[ActionSpace], ActionSpace]):
-#         """
-#         Set the action space of the environment
-#         :return: None
-#         """
-#         self.action_space = val
-#
-#     @property
-#     def phase(self) -> RunPhase:
-#         """
-#         Get the phase of the environments group
-#         :return: the current phase
-#         """
-#         return self.phase
-#
-#     @phase.setter
-#     def phase(self, val: RunPhase):
-#         """
-#         Change the phase of each one of the environments in the group
-#         :param val: the new phase
-#         :return: None
-#         """
-#         self.phase = val
-#         call_method_for_all(self.environments, 'phase', val)
-#
-#     def _create_environments(self):
-#         """
-#         Create the environments using the given parameters and update the environments list
-#         :return: None
-#         """
-#         for environment_parameters in self.environments_parameters:
-#             environment = create_environment(environment_parameters)
-#             self.action_space = self.action_space.append(environment.action_space)
-#             self.environments.append(environment)
-#
-#    @property
-#    def last_env_response(self) -> Union[List[Transition], Transition]:
-#        """
-#        Get the last environment response
-#        :return: a dictionary that contains the state, reward, etc.
-#        """
-#        return squeeze_list(self._last_env_response)
-#
-#    @last_env_response.setter
-#    def last_env_response(self, val: Union[List[Transition], Transition]):
-#        """
-#        Set the last environment response
-#        :param val: the last environment response
-#        """
-#        self._last_env_response = force_list(val)
-#
-#     def step(self, actions: Union[List[ActionType], ActionType]) -> List[Transition]:
-#         """
-#         Act in all the environments in the group.
-#         :param actions: can be either a single action if there is a single environment in the group, or a list of
-#                         actions in case there are multiple environments in the group. Each action can be an action index
-#                         or a numpy array representing a continuous action for example.
-#         :return: The responses from all the environments in the group
-#         """
-#
-#         actions = force_list(actions)
-#         if len(actions) != len(self.environments):
-#             raise ValueError("The number of actions does not match the number of environments in the group")
-#
-#         result = []
-#         for environment, action in zip(self.environments, actions):
-#             result.append(environment.step(action))
-#
-#         self.last_env_response = result
-#
-#         return result
-#
-#     def reset(self, force_environment_reset: bool=False) -> List[Transition]:
-#         """
-#         Reset all the environments in the group
-#         :param force_environment_reset: force the reset of each one of the environments
-#         :return: a list of the environments responses
-#         """
-#         return call_method_for_all(self.environments, 'reset', force_environment_reset)
-#
-#     def get_random_action(self) -> List[ActionType]:
-#        """
-#        Get a list of random action that can be applied on the environments in the group
-#        :return: a list of random actions
-#        """
-#         return call_method_for_all(self.environments, 'get_random_action')
-#
-#     def set_goal(self, goal: GoalType) -> None:
-#         """
-#         Set the goal of each one of the environments in the group to be the given goal
-#         :param goal: a goal vector
-#         :return: None
-#         """
-#         # TODO: maybe enable setting multiple goals?
-#         call_method_for_all(self.environments, 'set_goal', goal)