mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
N-step returns for rainbow (#67)
* n_step returns for rainbow * Rename CartPole_PPO -> CartPole_ClippedPPO
This commit is contained in:
@@ -74,7 +74,7 @@ class PolicyGradientsAgent(PolicyOptimizationAgent):
|
||||
# batch contains a list of episodes to learn from
|
||||
network_keys = self.ap.network_wrappers['main'].input_embedders_parameters.keys()
|
||||
|
||||
total_returns = batch.total_returns()
|
||||
total_returns = batch.n_step_discounted_rewards()
|
||||
for i in reversed(range(batch.size)):
|
||||
if self.policy_gradient_rescaler == PolicyGradientRescaler.TOTAL_RETURN:
|
||||
total_returns[i] = total_returns[0]
|
||||
|
||||
Reference in New Issue
Block a user