Batch RL (#238)

2026-04-27 03:11:28 +02:00 · 2019-03-19 18:07:09 +02:00
parent 4a8451ff02
commit e3c7e526c7
38 changed files with 1003 additions and 87 deletions
@@ -50,6 +50,9 @@ class DDQNAgent(ValueOptimizationAgent):
            (self.networks['main'].online_network, batch.states(network_keys))
        ])

+        # add Q value samples for logging
+        self.q_values.add_sample(TD_targets)
+
        # initialize with the current prediction so that we will
        #  only update the action that we have actually done in this transition
        TD_errors = []