Parallel agents fixes (#95)

* Parallel agents related bug fixes: checkpoint restore, tensorboard integration. Adding narrow networks support. Reference code for unlimited number of checkpoints
2026-02-01 13:25:45 +01:00 · 2018-05-24 14:24:19 +03:00
parent 6c0b59b4de
commit d302168c8c
10 changed files with 75 additions and 41 deletions
--- a/agents/agent.py
+++ b/agents/agent.py
@@ -550,9 +550,10 @@ class Agent(object):
                if current_snapshot_period > model_snapshots_periods_passed:
                    model_snapshots_periods_passed = current_snapshot_period
                    self.save_model(model_snapshots_periods_passed)
-                    to_pickle(self.running_observation_stats,
-                              os.path.join(self.tp.save_model_dir,
-                                           "running_stats.p".format(model_snapshots_periods_passed)))
+                    if self.running_observation_stats is not None:
+                        to_pickle(self.running_observation_stats,
+                                  os.path.join(self.tp.save_model_dir,
+                                               "running_stats.p".format(model_snapshots_periods_passed)))

            # play and record in replay buffer
            if self.tp.agent.collect_new_data:
--- a/agents/clipped_ppo_agent.py
+++ b/agents/clipped_ppo_agent.py
@@ -69,7 +69,7 @@ class ClippedPPOAgent(ActorCriticAgent):
            screen.warning("WARNING: The requested policy gradient rescaler is not available")

        # standardize
-        advantages = (advantages - np.mean(advantages)) / np.std(advantages)
+        advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)

        for transition, advantage, value_target in zip(batch, advantages, value_targets):
            transition.info['advantage'] = advantage