1
0
mirror of https://github.com/gryf/coach.git synced 2026-03-24 19:53:32 +01:00

N-step returns for rainbow (#67)

* n_step returns for rainbow
* Rename CartPole_PPO -> CartPole_ClippedPPO
This commit is contained in:
Gal Leibovich
2018-11-07 18:33:08 +02:00
committed by GitHub
parent 35c477c922
commit 49dea39d34
18 changed files with 173 additions and 117 deletions

View File

@@ -41,8 +41,8 @@ def test_store_and_get(buffer: SingleEpisodeBuffer):
# check that the episode is valid
episode = buffer.get(0)
assert episode.length() == 2
assert episode.get_transition(0).total_return == 1 + 0.99
assert episode.get_transition(1).total_return == 1
assert episode.get_transition(0).n_step_discounted_rewards == 1 + 0.99
assert episode.get_transition(1).n_step_discounted_rewards == 1
assert buffer.mean_reward() == 1
# only one episode in the replay buffer