mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Batch RL (#238)
This commit is contained in:
@@ -50,6 +50,9 @@ class DDQNAgent(ValueOptimizationAgent):
|
||||
(self.networks['main'].online_network, batch.states(network_keys))
|
||||
])
|
||||
|
||||
# add Q value samples for logging
|
||||
self.q_values.add_sample(TD_targets)
|
||||
|
||||
# initialize with the current prediction so that we will
|
||||
# only update the action that we have actually done in this transition
|
||||
TD_errors = []
|
||||
|
||||
Reference in New Issue
Block a user