update of api docstrings across coach and tutorials [WIP] (#91)

* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
2025-12-17 11:10:20 +01:00 · 2018-11-15 15:00:13 +02:00
parent 524f8436a2
commit 6d40ad1650
517 changed files with 71034 additions and 12834 deletions
--- a/rl_coach/exploration_policies/ucb.py
+++ b/rl_coach/exploration_policies/ucb.py
@@ -43,6 +43,15 @@ class UCBParameters(EGreedyParameters):


 class UCB(EGreedy):
+    """
+    UCB exploration policy is following the upper confidence bound heuristic to sample actions in discrete action spaces.
+    It assumes that there are multiple network heads that are predicting action values, and that the standard deviation
+    between the heads predictions represents the uncertainty of the agent in each of the actions.
+    It then updates the action value estimates to by mean(actions)+lambda*stdev(actions), where lambda is
+    given by the user. This exploration policy aims to take advantage of the uncertainty of the agent in its predictions,
+    and select the action according to the tradeoff between how uncertain the agent is, and how large it predicts
+    the outcome from those actions to be.
+    """
    def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule, evaluation_epsilon: float,
                 architecture_num_q_heads: int, lamb: int,
                 continuous_exploration_policy_parameters: ExplorationParameters = AdditiveNoiseParameters()):