Parallel agents fixes (#95)

* Parallel agents related bug fixes: checkpoint restore, tensorboard integration. Adding narrow networks support. Reference code for unlimited number of checkpoints
2026-02-16 05:55:46 +01:00 · 2018-05-24 14:24:19 +03:00
parent 6c0b59b4de
commit d302168c8c
10 changed files with 75 additions and 41 deletions
--- a/agents/clipped_ppo_agent.py
+++ b/agents/clipped_ppo_agent.py
@@ -69,7 +69,7 @@ class ClippedPPOAgent(ActorCriticAgent):
            screen.warning("WARNING: The requested policy gradient rescaler is not available")

        # standardize
-        advantages = (advantages - np.mean(advantages)) / np.std(advantages)
+        advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)

        for transition, advantage, value_target in zip(batch, advantages, value_targets):
            transition.info['advantage'] = advantage