mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
Parallel agents fixes (#95)
* Parallel agents related bug fixes: checkpoint restore, tensorboard integration. Adding narrow networks support. Reference code for unlimited number of checkpoints
This commit is contained in:
@@ -69,7 +69,7 @@ class ClippedPPOAgent(ActorCriticAgent):
|
||||
screen.warning("WARNING: The requested policy gradient rescaler is not available")
|
||||
|
||||
# standardize
|
||||
advantages = (advantages - np.mean(advantages)) / np.std(advantages)
|
||||
advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)
|
||||
|
||||
for transition, advantage, value_target in zip(batch, advantages, value_targets):
|
||||
transition.info['advantage'] = advantage
|
||||
|
||||
Reference in New Issue
Block a user