update of api docstrings across coach and tutorials [WIP] (#91)

* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
2026-03-04 15:55:47 +01:00 · 2018-11-15 15:00:13 +02:00
parent 524f8436a2
commit 6d40ad1650
517 changed files with 71034 additions and 12834 deletions
--- a/rl_coach/agents/dfp_agent.py
+++ b/rl_coach/agents/dfp_agent.py
@@ -81,6 +81,35 @@ class DFPMemoryParameters(EpisodicExperienceReplayParameters):


 class DFPAlgorithmParameters(AlgorithmParameters):
+    """
+    :param num_predicted_steps_ahead: (int)
+        Number of future steps to predict measurements for. The future steps won't be sequential, but rather jump
+        in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4
+
+    :param goal_vector: (List[float])
+        The goal vector will weight each of the measurements to form an optimization goal. The vector should have
+        the same length as the number of measurements, and it will be vector multiplied by the measurements.
+        Positive values correspond to trying to maximize the particular measurement, and negative values
+        correspond to trying to minimize the particular measurement.
+
+    :param future_measurements_weights: (List[float])
+        The future_measurements_weights weight the contribution of each of the predicted timesteps to the optimization
+        goal. For example, if there are 6 steps predicted ahead, and a future_measurements_weights vector with 3 values,
+        then only the 3 last timesteps will be taken into account, according to the weights in the
+        future_measurements_weights vector.
+
+    :param use_accumulated_reward_as_measurement: (bool)
+        If set to True, the accumulated reward from the beginning of the episode will be added as a measurement to
+        the measurements vector in the state. This van be useful in environments where the given measurements don't
+        include enough information for the particular goal the agent should achieve.
+
+    :param handling_targets_after_episode_end: (HandlingTargetsAfterEpisodeEnd)
+        Dictates how to handle measurements that are outside the episode length.
+
+    :param scale_measurements_targets: (Dict[str, float])
+        Allows rescaling the values of each of the measurements available. This van be useful when the measurements
+        have a different scale and you want to normalize them to the same scale.
+    """
    def __init__(self):
        super().__init__()
        self.num_predicted_steps_ahead = 6