1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

fix more agents

This commit is contained in:
Zach Dwiel
2018-02-16 20:06:51 -05:00
parent 98f57a0d87
commit 8248caf35e
6 changed files with 52 additions and 42 deletions

View File

@@ -114,7 +114,6 @@ class ClippedPPOAgent(ActorCriticAgent):
# otherwise, it has both a mean and standard deviation
for input_index, input in enumerate(old_policy_distribution):
inputs['output_0_{}'.format(input_index + 1)] = input
# print('old_policy_distribution.shape', len(old_policy_distribution))
total_loss, policy_losses, unclipped_grads, fetch_result =\
self.main_network.online_network.accumulate_gradients(
inputs, [total_return, advantages], additional_fetches=fetches)