1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

N-step returns for rainbow (#67)

* n_step returns for rainbow
* Rename CartPole_PPO -> CartPole_ClippedPPO
This commit is contained in:
Gal Leibovich
2018-11-07 18:33:08 +02:00
committed by GitHub
parent 35c477c922
commit 49dea39d34
18 changed files with 173 additions and 117 deletions

View File

@@ -58,11 +58,13 @@ class MixedMonteCarloAgent(ValueOptimizationAgent):
(self.networks['main'].online_network, batch.states(network_keys))
])
total_returns = batch.n_step_discounted_rewards()
for i in range(self.ap.network_wrappers['main'].batch_size):
one_step_target = batch.rewards()[i] + \
(1.0 - batch.game_overs()[i]) * self.ap.algorithm.discount * \
q_st_plus_1[i][selected_actions[i]]
monte_carlo_target = batch.total_returns()[i]
monte_carlo_target = total_returns()[i]
TD_targets[i, batch.actions()[i]] = (1 - self.mixing_rate) * one_step_target + \
self.mixing_rate * monte_carlo_target