update of api docstrings across coach and tutorials [WIP] (#91)

* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
2026-02-26 12:15:50 +01:00 · 2018-11-15 15:00:13 +02:00
parent 524f8436a2
commit 6d40ad1650
517 changed files with 71034 additions and 12834 deletions
--- a/rl_coach/agents/ddpg_agent.py
+++ b/rl_coach/agents/ddpg_agent.py
@@ -65,6 +65,33 @@ class DDPGActorNetworkParameters(NetworkParameters):


 class DDPGAlgorithmParameters(AlgorithmParameters):
+    """
+    :param num_steps_between_copying_online_weights_to_target: (StepMethod)
+        The number of steps between copying the online network weights to the target network weights.
+
+    :param rate_for_copying_weights_to_target: (float)
+        When copying the online network weights to the target network weights, a soft update will be used, which
+        weight the new online network weights by rate_for_copying_weights_to_target
+
+    :param num_consecutive_playing_steps: (StepMethod)
+        The number of consecutive steps to act between every two training iterations
+
+    :param use_target_network_for_evaluation: (bool)
+        If set to True, the target network will be used for predicting the actions when choosing actions to act.
+        Since the target network weights change more slowly, the predicted actions will be more consistent.
+
+    :param action_penalty: (float)
+        The amount by which to penalize the network on high action feature (pre-activation) values.
+        This can prevent the actions features from saturating the TanH activation function, and therefore prevent the
+        gradients from becoming very low.
+
+    :param clip_critic_targets: (Tuple[float, float] or None)
+        The range to clip the critic target to in order to prevent overestimation of the action values.
+
+    :param use_non_zero_discount_for_terminal_states: (bool)
+        If set to True, the discount factor will be used for terminal states to bootstrap the next predicted state
+        values. If set to False, the terminal states reward will be taken as the target return for the network.
+    """
    def __init__(self):
        super().__init__()
        self.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1)