mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
update of api docstrings across coach and tutorials [WIP] (#91)
* updating the documentation website * adding the built docs * update of api docstrings across coach and tutorials 0-2 * added some missing api documentation * New Sphinx based documentation
This commit is contained in:
@@ -24,6 +24,19 @@ from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperi
|
||||
|
||||
|
||||
class PALAlgorithmParameters(DQNAlgorithmParameters):
|
||||
"""
|
||||
:param pal_alpha: (float)
|
||||
A factor that weights the amount by which the advantage learning update will be taken into account.
|
||||
|
||||
:param persistent_advantage_learning: (bool)
|
||||
If set to True, the persistent mode of advantage learning will be used, which encourages the agent to take
|
||||
the same actions one after the other instead of changing actions.
|
||||
|
||||
:param monte_carlo_mixing_rate: (float)
|
||||
The amount of monte carlo values to mix into the targets of the network. The monte carlo values are just the
|
||||
total discounted returns, and they can help reduce the time it takes for the network to update to the newly
|
||||
seen values, since it is not based on bootstrapping the current network values.
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.pal_alpha = 0.9
|
||||
|
||||
Reference in New Issue
Block a user