update of api docstrings across coach and tutorials [WIP] (#91)

* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
2026-07-09 02:46:33 +02:00 · 2018-11-15 15:00:13 +02:00
parent 524f8436a2
commit 6d40ad1650
517 changed files with 71034 additions and 12834 deletions
@@ -0,0 +1,14 @@
+from .attention_discretization import AttentionDiscretization
+from .box_discretization import BoxDiscretization
+from .box_masking import BoxMasking
+from .full_discrete_action_space_map import FullDiscreteActionSpaceMap
+from .linear_box_to_box_map import LinearBoxToBoxMap
+from .partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
+__all__ = [
+    'AttentionDiscretization',
+    'BoxDiscretization',
+    'BoxMasking',
+    'FullDiscreteActionSpaceMap',
+    'LinearBoxToBoxMap',
+    'PartialDiscreteActionSpaceMap'
+]
@@ -25,11 +25,18 @@ from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteAction

 class AttentionDiscretization(PartialDiscreteActionSpaceMap):
    """
-    Given a box action space, this is used to discretize the space.
-    The discretization is achieved by creating a grid in the space with num_bins_per_dimension bins per dimension in the
-    space. Each discrete action is mapped to a single sub-box in the BoxActionSpace action space.
+    Discretizes an **AttentionActionSpace**. The attention action space defines the actions
+    as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing
+    a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop
+    windows to choose into a finite number of options, and map a discrete action space into those crop windows.
+
+    Warning! this will currently only work for attention spaces with 2 dimensions.
    """
    def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False):
+        """
+        :param num_bins_per_dimension: Number of discrete bins to use for each dimension of the action space
+        :param force_int_bins: If set to True, all the bins will represent integer coordinates in space.
+        """
        # we allow specifying either a single number for all dimensions, or a single number per dimension in the target
        # action space
        self.num_bins_per_dimension = num_bins_per_dimension
@@ -25,9 +25,12 @@ from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace

 class BoxDiscretization(PartialDiscreteActionSpaceMap):
    """
-    Given a box action space, this is used to discretize the space.
-    The discretization is achieved by creating a grid in the space with num_bins_per_dimension bins per dimension in the
-    space. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space.
+    Discretizes a continuous action space into a discrete action space, allowing the usage of
+    agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the
+    original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete
+    action index. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space.
+    For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action
+    space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1.
    """
    def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False):
        """
@@ -25,12 +25,10 @@ from rl_coach.spaces import BoxActionSpace

 class BoxMasking(ActionFilter):
    """
-    Masks a box action space by allowing only selecting a subset of the space
-    For example,
-    - the target action space has actions of shape 1 with values between 10 and 32
-    - we mask the target action space so that only the action 20 to 25 can be chosen
-    The actions will be between 0 to 5 and the mapping will add an offset of 20 to the incoming actions
-    The shape of the source and target action spaces is always the same
+    Masks part of the action space to enforce the agent to work in a defined space. For example,
+    if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions
+    to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.
+    The resulting action space will be shifted and will always start from 0 and have the size of the unmasked area.
    """
    def __init__(self,
                 masked_target_space_low: Union[None, int, float, np.ndarray],
@@ -20,7 +20,9 @@ from rl_coach.spaces import ActionSpace, DiscreteActionSpace

 class FullDiscreteActionSpaceMap(PartialDiscreteActionSpaceMap):
    """
-    Maps all the actions in the output space to discrete actions in the action space.
+    Full map of two countable action spaces. This works in a similar way to the
+    PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without
+    masking any actions.
    For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those
    multiselect actions.
    """
@@ -25,17 +25,19 @@ from rl_coach.spaces import BoxActionSpace

 class LinearBoxToBoxMap(ActionFilter):
    """
-    Maps a box action space to a box action space.
-    For example,
-    - the source action space has actions of shape 1 with values between -42 and -10,
-    - the target action space has actions of shape 1 with values between 10 and 32
-    The mapping will add an offset of 52 to the incoming actions and then multiply them by 22/32 to scale them to the
-    target action space
-    The shape of the source and target action spaces is always the same
+    A linear mapping of two box action spaces. For example, if the action space of the
+    environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1,
+    the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the
+    action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped
+    between those values.
    """
    def __init__(self,
                 input_space_low: Union[None, int, float, np.ndarray],
                 input_space_high: Union[None, int, float, np.ndarray]):
+        """
+        :param input_space_low: the low values of the desired action space
+        :param input_space_high: the high values of the desired action space
+        """
        self.input_space_low = input_space_low
        self.input_space_high = input_space_high
        self.rescale = None
@@ -23,11 +23,17 @@ from rl_coach.spaces import DiscreteActionSpace, ActionSpace

 class PartialDiscreteActionSpaceMap(ActionFilter):
    """
-    Maps the given actions from the output space to discrete actions in the action space.
-    For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those
-    multiselect actions.
+    Partial map of two countable action spaces. For example, consider an environment
+    with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual
+    MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can
+    map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to
+    use regular discrete actions, and mask 3 of the actions from the agent.
    """
    def __init__(self, target_actions: List[ActionType]=None, descriptions: List[str]=None):
+        """
+        :param target_actions: A partial list of actions from the target space to map to.
+        :param descriptions: a list of descriptions of each of the actions
+        """
        self.target_actions = target_actions
        self.descriptions = descriptions
        super().__init__()
@@ -0,0 +1,25 @@
+from .observation_clipping_filter import ObservationClippingFilter
+from .observation_crop_filter import ObservationCropFilter
+from .observation_move_axis_filter import ObservationMoveAxisFilter
+from .observation_normalization_filter import ObservationNormalizationFilter
+from .observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
+from .observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter
+from .observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
+from .observation_rgb_to_y_filter import ObservationRGBToYFilter
+from .observation_squeeze_filter import ObservationSqueezeFilter
+from .observation_stacking_filter import ObservationStackingFilter
+from .observation_to_uint8_filter import ObservationToUInt8Filter
+
+__all__ = [
+    'ObservationClippingFilter',
+    'ObservationCropFilter',
+    'ObservationMoveAxisFilter',
+    'ObservationNormalizationFilter',
+    'ObservationReductionBySubPartsNameFilter',
+    'ObservationRescaleSizeByFactorFilter',
+    'ObservationRescaleToSizeFilter',
+    'ObservationRGBToYFilter',
+    'ObservationSqueezeFilter',
+    'ObservationStackingFilter',
+    'ObservationToUInt8Filter'
+]
@@ -24,7 +24,10 @@ from rl_coach.spaces import ObservationSpace

 class ObservationClippingFilter(ObservationFilter):
    """
-    Clip the observation values using the given ranges
+    Clips the observation values to a given range of values.
+    For example, if the observation consists of measurements in an arbitrary range,
+    and we want to control the minimum and maximum values of these observations,
+    we can define a range and clip the values of the measurements.
    """
    def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf):
        """
@@ -24,7 +24,9 @@ from rl_coach.spaces import ObservationSpace

 class ObservationCropFilter(ObservationFilter):
    """
-    Crops the current state observation to a given shape
+    Crops the size of the observation to a given crop window. For example, in Atari, the
+    observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a
+    square of 160x160 before rescaling them.
    """
    def __init__(self, crop_low: np.ndarray=None, crop_high: np.ndarray=None):
        """
@@ -23,9 +23,14 @@ from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace

 class ObservationMoveAxisFilter(ObservationFilter):
    """
-    Move an axis of the observation to a different place.
+    Reorders the axes of the observation. This can be useful when the observation is an
+    image, and we want to move the channel axis to be the last axis instead of the first axis.
    """
    def __init__(self, axis_origin: int = None, axis_target: int=None):
+        """
+        :param axis_origin: The axis to move
+        :param axis_target: Where to move the selected axis to
+        """
        super().__init__()
        self.axis_origin = axis_origin
        self.axis_target = axis_target
@@ -25,8 +25,9 @@ from rl_coach.spaces import ObservationSpace

 class ObservationNormalizationFilter(ObservationFilter):
    """
-    Normalize the observation with a running standard deviation and mean of the observations seen so far
-    If there is more than a single worker, the statistics of the observations are shared between all the workers
+    Normalizes the observation values with a running mean and standard deviation of
+    all the observations seen so far. The normalization is performed element-wise. Additionally, when working with
+    multiple workers, the statistics used for the normalization operation are accumulated over all the workers.
    """
    def __init__(self, clip_min: float=-5.0, clip_max: float=5.0, name='observation_stats'):
        """
@@ -26,9 +26,11 @@ from rl_coach.spaces import ObservationSpace, VectorObservationSpace

 class ObservationReductionBySubPartsNameFilter(ObservationFilter):
    """
-    Choose sub parts of the observation to remove or keep using their name.
-    This is useful when the environment has a measurements vector as observation which includes several different
+    Allows keeping only parts of the observation, by specifying their
+    name. This is useful when the environment has a measurements vector as observation which includes several different
    measurements, but you want the agent to only see some of the measurements and not all.
+    For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as
+    speed and location. If we want to only use the speed, it can be done using this filter.
    This will currently work only for VectorObservationSpace observations
    """
    class ReductionMethod(Enum):
@@ -35,7 +35,8 @@ class RescaleInterpolationType(Enum):

 class ObservationRescaleSizeByFactorFilter(ObservationFilter):
    """
-    Scales the current state observation size by a given factor
+    Rescales an image observation by some factor. For example, the image size
+    can be reduced by a factor of 2.
    Warning: this requires the input observation to be of type uint8 due to scipy requirements!
    """
    def __init__(self, rescale_factor: float, rescaling_interpolation_type: RescaleInterpolationType):
@@ -37,7 +37,8 @@ class RescaleInterpolationType(Enum):

 class ObservationRescaleToSizeFilter(ObservationFilter):
    """
-    Scales the current state observation to a given shape
+    Rescales an image observation to a given size. The target size does not
+    necessarily keep the aspect ratio of the original observation.
    Warning: this requires the input observation to be of type uint8 due to scipy requirements!
    """
    def __init__(self, output_observation_space: PlanarMapsObservationSpace,
@@ -21,7 +21,9 @@ from rl_coach.spaces import ObservationSpace

 class ObservationRGBToYFilter(ObservationFilter):
    """
-    Converts the observation in the current state to gray scale (Y channel).
+    Converts a color image observation specified using the RGB encoding into a grayscale
+    image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors
+    in the original image are not relevant for solving the task at hand.
    The channels axis is assumed to be the last axis
    """
    def __init__(self):
@@ -23,9 +23,12 @@ from rl_coach.spaces import ObservationSpace

 class ObservationSqueezeFilter(ObservationFilter):
    """
-    Squeezes the observation so to eliminate redundant axes.
+    Removes redundant axes from the observation, which are axes with a dimension of 1.
    """
    def __init__(self, axis: int = None):
+        """
+        :param axis: Specifies which axis to remove. If set to None, all the axes of size 1 will be removed.
+        """
        super().__init__()
        self.axis = axis

@@ -43,7 +43,10 @@ class LazyStack(object):

 class ObservationStackingFilter(ObservationFilter):
    """
-    Stack the current state observation on top of several previous observations.
+    Stacks several observations on top of each other. For image observation this will
+    create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this,
+    a LazyStack object is used in order to wrap the observations in the stack. For this reason, the
+    ObservationStackingFilter **must** be the last filter in the inputs filters stack.
    This filter is stateful since it stores the previous step result and depends on it.
    The filter adds an additional dimension to the output observation.

@@ -23,10 +23,15 @@ from rl_coach.spaces import ObservationSpace

 class ObservationToUInt8Filter(ObservationFilter):
    """
-    Converts the observation values to be uint8 values between 0 and 255.
-    It first scales the observation values to fit in the range and then converts them to uint8.
+    Converts a floating point observation into an unsigned int 8 bit observation. This is
+    mostly useful for reducing memory consumption and is usually used for image observations. The filter will first
+    spread the observation values over the range 0-255 and then discretize them into integer values.
    """
    def __init__(self, input_low: float, input_high: float):
+        """
+        :param input_low: The lowest value currently present in the observation
+        :param input_high: The highest value currently present in the observation
+        """
        super().__init__()
        self.input_low = input_low
        self.input_high = input_high
@@ -0,0 +1,8 @@
+from .reward_rescale_filter import RewardRescaleFilter
+from .reward_clipping_filter import RewardClippingFilter
+from .reward_normalization_filter import RewardNormalizationFilter
+__all__ = [
+    'RewardRescaleFilter',
+    'RewardClippingFilter',
+    'RewardNormalizationFilter'
+]
@@ -23,7 +23,8 @@ from rl_coach.spaces import RewardSpace

 class RewardClippingFilter(RewardFilter):
    """
-    Clips the reward to some range
+    Clips the reward values into a given range. For example, in DQN, the Atari rewards are
+    clipped into the range -1 and 1 in order to control the scale of the returns.
    """
    def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf):
        """
@@ -25,8 +25,9 @@ from rl_coach.spaces import RewardSpace

 class RewardNormalizationFilter(RewardFilter):
    """
-    Normalize the reward with a running standard deviation and mean of the rewards seen so far
-    If there is more than a single worker, the statistics of the rewards are shared between all the workers
+    Normalizes the reward values with a running mean and standard deviation of
+    all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation
+    are accumulated over all the workers.
    """
    def __init__(self, clip_min: float=-5.0, clip_max: float=5.0):
        """
@@ -21,7 +21,8 @@ from rl_coach.spaces import RewardSpace

 class RewardRescaleFilter(RewardFilter):
    """
-    Rescales the reward by multiplying with some factor
+    Rescales the reward by a given factor. Rescaling the rewards of the environment has been
+    observed to have a large effect (negative or positive) on the behavior of the learning process.
    """
    def __init__(self, rescale_factor: float):
        """