1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-18 11:40:18 +01:00
This commit is contained in:
Gal Leibovich
2019-03-19 18:07:09 +02:00
committed by GitHub
parent 4a8451ff02
commit e3c7e526c7
38 changed files with 1003 additions and 87 deletions

View File

@@ -100,6 +100,9 @@ class CategoricalDQNAgent(ValueOptimizationAgent):
(self.networks['main'].online_network, batch.states(network_keys))
])
# add Q value samples for logging
self.q_values.add_sample(self.distribution_prediction_to_q_values(TD_targets))
# select the optimal actions for the next state
target_actions = np.argmax(self.distribution_prediction_to_q_values(distributional_q_st_plus_1), axis=1)
m = np.zeros((self.ap.network_wrappers['main'].batch_size, self.z_values.size))