1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

bug fixes for clippedppo and checkpoints

This commit is contained in:
Gal Novik
2018-04-30 15:13:29 +03:00
parent f31159aad6
commit dafdb05a7c
3 changed files with 17 additions and 6 deletions

View File

@@ -176,7 +176,7 @@ class ClippedPPOAgent(ActorCriticAgent):
dataset = dataset[:self.tp.agent.num_consecutive_playing_steps]
if self.tp.distributed and self.tp.agent.share_statistics_between_workers:
self.running_observation_stats.push(np.array([t.state['observation'] for t in dataset]))
self.running_observation_stats.push(np.array([np.array(t.state['observation']) for t in dataset]))
losses = self.train_network(dataset, 10)
self.value_loss.add_sample(losses[0])