mirror of
https://github.com/gryf/coach.git
synced 2026-03-22 18:43:31 +01:00
update of api docstrings across coach and tutorials [WIP] (#91)
* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
from .attention_discretization import AttentionDiscretization
|
||||
from .box_discretization import BoxDiscretization
|
||||
from .box_masking import BoxMasking
|
||||
from .full_discrete_action_space_map import FullDiscreteActionSpaceMap
|
||||
from .linear_box_to_box_map import LinearBoxToBoxMap
|
||||
from .partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
|
||||
__all__ = [
|
||||
'AttentionDiscretization',
|
||||
'BoxDiscretization',
|
||||
'BoxMasking',
|
||||
'FullDiscreteActionSpaceMap',
|
||||
'LinearBoxToBoxMap',
|
||||
'PartialDiscreteActionSpaceMap'
|
||||
]
|
||||
@@ -25,11 +25,18 @@ from rl_coach.spaces import AttentionActionSpace, BoxActionSpace, DiscreteAction
|
||||
|
||||
class AttentionDiscretization(PartialDiscreteActionSpaceMap):
|
||||
"""
|
||||
Given a box action space, this is used to discretize the space.
|
||||
The discretization is achieved by creating a grid in the space with num_bins_per_dimension bins per dimension in the
|
||||
space. Each discrete action is mapped to a single sub-box in the BoxActionSpace action space.
|
||||
Discretizes an **AttentionActionSpace**. The attention action space defines the actions
|
||||
as choosing sub-boxes in a given box. For example, consider an image of size 100x100, where the action is choosing
|
||||
a crop window of size 20x20 to attend to in the image. AttentionDiscretization allows discretizing the possible crop
|
||||
windows to choose into a finite number of options, and map a discrete action space into those crop windows.
|
||||
|
||||
Warning! this will currently only work for attention spaces with 2 dimensions.
|
||||
"""
|
||||
def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False):
|
||||
"""
|
||||
:param num_bins_per_dimension: Number of discrete bins to use for each dimension of the action space
|
||||
:param force_int_bins: If set to True, all the bins will represent integer coordinates in space.
|
||||
"""
|
||||
# we allow specifying either a single number for all dimensions, or a single number per dimension in the target
|
||||
# action space
|
||||
self.num_bins_per_dimension = num_bins_per_dimension
|
||||
|
||||
@@ -25,9 +25,12 @@ from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
|
||||
|
||||
class BoxDiscretization(PartialDiscreteActionSpaceMap):
|
||||
"""
|
||||
Given a box action space, this is used to discretize the space.
|
||||
The discretization is achieved by creating a grid in the space with num_bins_per_dimension bins per dimension in the
|
||||
space. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space.
|
||||
Discretizes a continuous action space into a discrete action space, allowing the usage of
|
||||
agents such as DQN for continuous environments such as MuJoCo. Given the number of bins to discretize into, the
|
||||
original continuous action space is uniformly separated into the given number of bins, each mapped to a discrete
|
||||
action index. Each discrete action is mapped to a single N dimensional action in the BoxActionSpace action space.
|
||||
For example, if the original actions space is between -1 and 1 and 5 bins were selected, the new action
|
||||
space will consist of 5 actions mapped to -1, -0.5, 0, 0.5 and 1.
|
||||
"""
|
||||
def __init__(self, num_bins_per_dimension: Union[int, List[int]], force_int_bins=False):
|
||||
"""
|
||||
|
||||
@@ -25,12 +25,10 @@ from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
class BoxMasking(ActionFilter):
|
||||
"""
|
||||
Masks a box action space by allowing only selecting a subset of the space
|
||||
For example,
|
||||
- the target action space has actions of shape 1 with values between 10 and 32
|
||||
- we mask the target action space so that only the action 20 to 25 can be chosen
|
||||
The actions will be between 0 to 5 and the mapping will add an offset of 20 to the incoming actions
|
||||
The shape of the source and target action spaces is always the same
|
||||
Masks part of the action space to enforce the agent to work in a defined space. For example,
|
||||
if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions
|
||||
to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.
|
||||
The resulting action space will be shifted and will always start from 0 and have the size of the unmasked area.
|
||||
"""
|
||||
def __init__(self,
|
||||
masked_target_space_low: Union[None, int, float, np.ndarray],
|
||||
|
||||
@@ -20,7 +20,9 @@ from rl_coach.spaces import ActionSpace, DiscreteActionSpace
|
||||
|
||||
class FullDiscreteActionSpaceMap(PartialDiscreteActionSpaceMap):
|
||||
"""
|
||||
Maps all the actions in the output space to discrete actions in the action space.
|
||||
Full map of two countable action spaces. This works in a similar way to the
|
||||
PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without
|
||||
masking any actions.
|
||||
For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those
|
||||
multiselect actions.
|
||||
"""
|
||||
|
||||
@@ -25,17 +25,19 @@ from rl_coach.spaces import BoxActionSpace
|
||||
|
||||
class LinearBoxToBoxMap(ActionFilter):
|
||||
"""
|
||||
Maps a box action space to a box action space.
|
||||
For example,
|
||||
- the source action space has actions of shape 1 with values between -42 and -10,
|
||||
- the target action space has actions of shape 1 with values between 10 and 32
|
||||
The mapping will add an offset of 52 to the incoming actions and then multiply them by 22/32 to scale them to the
|
||||
target action space
|
||||
The shape of the source and target action spaces is always the same
|
||||
A linear mapping of two box action spaces. For example, if the action space of the
|
||||
environment consists of continuous actions between 0 and 1, and we want the agent to choose actions between -1 and 1,
|
||||
the LinearBoxToBoxMap can be used to map the range -1 and 1 to the range 0 and 1 in a linear way. This means that the
|
||||
action -1 will be mapped to 0, the action 1 will be mapped to 1, and the rest of the actions will be linearly mapped
|
||||
between those values.
|
||||
"""
|
||||
def __init__(self,
|
||||
input_space_low: Union[None, int, float, np.ndarray],
|
||||
input_space_high: Union[None, int, float, np.ndarray]):
|
||||
"""
|
||||
:param input_space_low: the low values of the desired action space
|
||||
:param input_space_high: the high values of the desired action space
|
||||
"""
|
||||
self.input_space_low = input_space_low
|
||||
self.input_space_high = input_space_high
|
||||
self.rescale = None
|
||||
|
||||
@@ -23,11 +23,17 @@ from rl_coach.spaces import DiscreteActionSpace, ActionSpace
|
||||
|
||||
class PartialDiscreteActionSpaceMap(ActionFilter):
|
||||
"""
|
||||
Maps the given actions from the output space to discrete actions in the action space.
|
||||
For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those
|
||||
multiselect actions.
|
||||
Partial map of two countable action spaces. For example, consider an environment
|
||||
with a MultiSelect action space (select multiple actions at the same time, such as jump and go right), with 8 actual
|
||||
MultiSelect actions. If we want the agent to be able to select only 5 of those actions by their index (0-4), we can
|
||||
map a discrete action space with 5 actions into the 5 selected MultiSelect actions. This will both allow the agent to
|
||||
use regular discrete actions, and mask 3 of the actions from the agent.
|
||||
"""
|
||||
def __init__(self, target_actions: List[ActionType]=None, descriptions: List[str]=None):
|
||||
"""
|
||||
:param target_actions: A partial list of actions from the target space to map to.
|
||||
:param descriptions: a list of descriptions of each of the actions
|
||||
"""
|
||||
self.target_actions = target_actions
|
||||
self.descriptions = descriptions
|
||||
super().__init__()
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
from .observation_clipping_filter import ObservationClippingFilter
|
||||
from .observation_crop_filter import ObservationCropFilter
|
||||
from .observation_move_axis_filter import ObservationMoveAxisFilter
|
||||
from .observation_normalization_filter import ObservationNormalizationFilter
|
||||
from .observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
|
||||
from .observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter
|
||||
from .observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
|
||||
from .observation_rgb_to_y_filter import ObservationRGBToYFilter
|
||||
from .observation_squeeze_filter import ObservationSqueezeFilter
|
||||
from .observation_stacking_filter import ObservationStackingFilter
|
||||
from .observation_to_uint8_filter import ObservationToUInt8Filter
|
||||
|
||||
__all__ = [
|
||||
'ObservationClippingFilter',
|
||||
'ObservationCropFilter',
|
||||
'ObservationMoveAxisFilter',
|
||||
'ObservationNormalizationFilter',
|
||||
'ObservationReductionBySubPartsNameFilter',
|
||||
'ObservationRescaleSizeByFactorFilter',
|
||||
'ObservationRescaleToSizeFilter',
|
||||
'ObservationRGBToYFilter',
|
||||
'ObservationSqueezeFilter',
|
||||
'ObservationStackingFilter',
|
||||
'ObservationToUInt8Filter'
|
||||
]
|
||||
@@ -24,7 +24,10 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationClippingFilter(ObservationFilter):
|
||||
"""
|
||||
Clip the observation values using the given ranges
|
||||
Clips the observation values to a given range of values.
|
||||
For example, if the observation consists of measurements in an arbitrary range,
|
||||
and we want to control the minimum and maximum values of these observations,
|
||||
we can define a range and clip the values of the measurements.
|
||||
"""
|
||||
def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf):
|
||||
"""
|
||||
|
||||
@@ -24,7 +24,9 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationCropFilter(ObservationFilter):
|
||||
"""
|
||||
Crops the current state observation to a given shape
|
||||
Crops the size of the observation to a given crop window. For example, in Atari, the
|
||||
observations are images with a shape of 210x160. Usually, we will want to crop the size of the observation to a
|
||||
square of 160x160 before rescaling them.
|
||||
"""
|
||||
def __init__(self, crop_low: np.ndarray=None, crop_high: np.ndarray=None):
|
||||
"""
|
||||
|
||||
@@ -23,9 +23,14 @@ from rl_coach.spaces import ObservationSpace, PlanarMapsObservationSpace
|
||||
|
||||
class ObservationMoveAxisFilter(ObservationFilter):
|
||||
"""
|
||||
Move an axis of the observation to a different place.
|
||||
Reorders the axes of the observation. This can be useful when the observation is an
|
||||
image, and we want to move the channel axis to be the last axis instead of the first axis.
|
||||
"""
|
||||
def __init__(self, axis_origin: int = None, axis_target: int=None):
|
||||
"""
|
||||
:param axis_origin: The axis to move
|
||||
:param axis_target: Where to move the selected axis to
|
||||
"""
|
||||
super().__init__()
|
||||
self.axis_origin = axis_origin
|
||||
self.axis_target = axis_target
|
||||
|
||||
@@ -25,8 +25,9 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationNormalizationFilter(ObservationFilter):
|
||||
"""
|
||||
Normalize the observation with a running standard deviation and mean of the observations seen so far
|
||||
If there is more than a single worker, the statistics of the observations are shared between all the workers
|
||||
Normalizes the observation values with a running mean and standard deviation of
|
||||
all the observations seen so far. The normalization is performed element-wise. Additionally, when working with
|
||||
multiple workers, the statistics used for the normalization operation are accumulated over all the workers.
|
||||
"""
|
||||
def __init__(self, clip_min: float=-5.0, clip_max: float=5.0, name='observation_stats'):
|
||||
"""
|
||||
|
||||
@@ -26,9 +26,11 @@ from rl_coach.spaces import ObservationSpace, VectorObservationSpace
|
||||
|
||||
class ObservationReductionBySubPartsNameFilter(ObservationFilter):
|
||||
"""
|
||||
Choose sub parts of the observation to remove or keep using their name.
|
||||
This is useful when the environment has a measurements vector as observation which includes several different
|
||||
Allows keeping only parts of the observation, by specifying their
|
||||
name. This is useful when the environment has a measurements vector as observation which includes several different
|
||||
measurements, but you want the agent to only see some of the measurements and not all.
|
||||
For example, the CARLA environment extracts multiple measurements that can be used by the agent, such as
|
||||
speed and location. If we want to only use the speed, it can be done using this filter.
|
||||
This will currently work only for VectorObservationSpace observations
|
||||
"""
|
||||
class ReductionMethod(Enum):
|
||||
|
||||
@@ -35,7 +35,8 @@ class RescaleInterpolationType(Enum):
|
||||
|
||||
class ObservationRescaleSizeByFactorFilter(ObservationFilter):
|
||||
"""
|
||||
Scales the current state observation size by a given factor
|
||||
Rescales an image observation by some factor. For example, the image size
|
||||
can be reduced by a factor of 2.
|
||||
Warning: this requires the input observation to be of type uint8 due to scipy requirements!
|
||||
"""
|
||||
def __init__(self, rescale_factor: float, rescaling_interpolation_type: RescaleInterpolationType):
|
||||
|
||||
@@ -37,7 +37,8 @@ class RescaleInterpolationType(Enum):
|
||||
|
||||
class ObservationRescaleToSizeFilter(ObservationFilter):
|
||||
"""
|
||||
Scales the current state observation to a given shape
|
||||
Rescales an image observation to a given size. The target size does not
|
||||
necessarily keep the aspect ratio of the original observation.
|
||||
Warning: this requires the input observation to be of type uint8 due to scipy requirements!
|
||||
"""
|
||||
def __init__(self, output_observation_space: PlanarMapsObservationSpace,
|
||||
|
||||
@@ -21,7 +21,9 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationRGBToYFilter(ObservationFilter):
|
||||
"""
|
||||
Converts the observation in the current state to gray scale (Y channel).
|
||||
Converts a color image observation specified using the RGB encoding into a grayscale
|
||||
image observation, by keeping only the luminance (Y) channel of the YUV encoding. This can be useful if the colors
|
||||
in the original image are not relevant for solving the task at hand.
|
||||
The channels axis is assumed to be the last axis
|
||||
"""
|
||||
def __init__(self):
|
||||
|
||||
@@ -23,9 +23,12 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationSqueezeFilter(ObservationFilter):
|
||||
"""
|
||||
Squeezes the observation so to eliminate redundant axes.
|
||||
Removes redundant axes from the observation, which are axes with a dimension of 1.
|
||||
"""
|
||||
def __init__(self, axis: int = None):
|
||||
"""
|
||||
:param axis: Specifies which axis to remove. If set to None, all the axes of size 1 will be removed.
|
||||
"""
|
||||
super().__init__()
|
||||
self.axis = axis
|
||||
|
||||
|
||||
@@ -43,7 +43,10 @@ class LazyStack(object):
|
||||
|
||||
class ObservationStackingFilter(ObservationFilter):
|
||||
"""
|
||||
Stack the current state observation on top of several previous observations.
|
||||
Stacks several observations on top of each other. For image observation this will
|
||||
create a 3D blob. The stacking is done in a lazy manner in order to reduce memory consumption. To achieve this,
|
||||
a LazyStack object is used in order to wrap the observations in the stack. For this reason, the
|
||||
ObservationStackingFilter **must** be the last filter in the inputs filters stack.
|
||||
This filter is stateful since it stores the previous step result and depends on it.
|
||||
The filter adds an additional dimension to the output observation.
|
||||
|
||||
|
||||
@@ -23,10 +23,15 @@ from rl_coach.spaces import ObservationSpace
|
||||
|
||||
class ObservationToUInt8Filter(ObservationFilter):
|
||||
"""
|
||||
Converts the observation values to be uint8 values between 0 and 255.
|
||||
It first scales the observation values to fit in the range and then converts them to uint8.
|
||||
Converts a floating point observation into an unsigned int 8 bit observation. This is
|
||||
mostly useful for reducing memory consumption and is usually used for image observations. The filter will first
|
||||
spread the observation values over the range 0-255 and then discretize them into integer values.
|
||||
"""
|
||||
def __init__(self, input_low: float, input_high: float):
|
||||
"""
|
||||
:param input_low: The lowest value currently present in the observation
|
||||
:param input_high: The highest value currently present in the observation
|
||||
"""
|
||||
super().__init__()
|
||||
self.input_low = input_low
|
||||
self.input_high = input_high
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
from .reward_rescale_filter import RewardRescaleFilter
|
||||
from .reward_clipping_filter import RewardClippingFilter
|
||||
from .reward_normalization_filter import RewardNormalizationFilter
|
||||
__all__ = [
|
||||
'RewardRescaleFilter',
|
||||
'RewardClippingFilter',
|
||||
'RewardNormalizationFilter'
|
||||
]
|
||||
@@ -23,7 +23,8 @@ from rl_coach.spaces import RewardSpace
|
||||
|
||||
class RewardClippingFilter(RewardFilter):
|
||||
"""
|
||||
Clips the reward to some range
|
||||
Clips the reward values into a given range. For example, in DQN, the Atari rewards are
|
||||
clipped into the range -1 and 1 in order to control the scale of the returns.
|
||||
"""
|
||||
def __init__(self, clipping_low: float=-np.inf, clipping_high: float=np.inf):
|
||||
"""
|
||||
|
||||
@@ -25,8 +25,9 @@ from rl_coach.spaces import RewardSpace
|
||||
|
||||
class RewardNormalizationFilter(RewardFilter):
|
||||
"""
|
||||
Normalize the reward with a running standard deviation and mean of the rewards seen so far
|
||||
If there is more than a single worker, the statistics of the rewards are shared between all the workers
|
||||
Normalizes the reward values with a running mean and standard deviation of
|
||||
all the rewards seen so far. When working with multiple workers, the statistics used for the normalization operation
|
||||
are accumulated over all the workers.
|
||||
"""
|
||||
def __init__(self, clip_min: float=-5.0, clip_max: float=5.0):
|
||||
"""
|
||||
|
||||
@@ -21,7 +21,8 @@ from rl_coach.spaces import RewardSpace
|
||||
|
||||
class RewardRescaleFilter(RewardFilter):
|
||||
"""
|
||||
Rescales the reward by multiplying with some factor
|
||||
Rescales the reward by a given factor. Rescaling the rewards of the environment has been
|
||||
observed to have a large effect (negative or positive) on the behavior of the learning process.
|
||||
"""
|
||||
def __init__(self, rescale_factor: float):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user