bug fixes for clippedppo and checkpoints

2026-02-16 05:55:46 +01:00 · 2018-04-30 15:13:29 +03:00
parent f31159aad6
commit dafdb05a7c
3 changed files with 17 additions and 6 deletions
--- a/agents/clipped_ppo_agent.py
+++ b/agents/clipped_ppo_agent.py
@@ -176,7 +176,7 @@ class ClippedPPOAgent(ActorCriticAgent):
        dataset = dataset[:self.tp.agent.num_consecutive_playing_steps]

        if self.tp.distributed and self.tp.agent.share_statistics_between_workers:
-            self.running_observation_stats.push(np.array([t.state['observation'] for t in dataset]))
+            self.running_observation_stats.push(np.array([np.array(t.state['observation']) for t in dataset]))

        losses = self.train_network(dataset, 10)
        self.value_loss.add_sample(losses[0])