mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
Batch RL (#238)
This commit is contained in:
@@ -95,6 +95,9 @@ class RainbowDQNAgent(CategoricalDQNAgent):
|
||||
(self.networks['main'].online_network, batch.states(network_keys))
|
||||
])
|
||||
|
||||
# add Q value samples for logging
|
||||
self.q_values.add_sample(self.distribution_prediction_to_q_values(TD_targets))
|
||||
|
||||
# only update the action that we have actually done in this transition (using the Double-DQN selected actions)
|
||||
target_actions = ddqn_selected_actions
|
||||
m = np.zeros((self.ap.network_wrappers['main'].batch_size, self.z_values.size))
|
||||
|
||||
Reference in New Issue
Block a user