mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Batch RL (#238)
This commit is contained in:
@@ -81,6 +81,9 @@ class DQNAgent(ValueOptimizationAgent):
|
||||
(self.networks['main'].online_network, batch.states(network_keys))
|
||||
])
|
||||
|
||||
# add Q value samples for logging
|
||||
self.q_values.add_sample(TD_targets)
|
||||
|
||||
# only update the action that we have actually done in this transition
|
||||
TD_errors = []
|
||||
for i in range(self.ap.network_wrappers['main'].batch_size):
|
||||
|
||||
Reference in New Issue
Block a user