Batch RL (#238)

2026-02-27 12:45:52 +01:00 · 2019-03-19 18:07:09 +02:00
parent 4a8451ff02
commit e3c7e526c7
38 changed files with 1003 additions and 87 deletions
--- a/rl_coach/agents/n_step_q_agent.py
+++ b/rl_coach/agents/n_step_q_agent.py
@@ -123,6 +123,9 @@ class NStepQAgent(ValueOptimizationAgent, PolicyOptimizationAgent):
        else:
            assert True, 'The available values for targets_horizon are: 1-Step, N-Step'

+        # add Q value samples for logging
+        self.q_values.add_sample(state_value_head_targets)
+
        # train
        result = self.networks['main'].online_network.accumulate_gradients(batch.states(network_keys), [state_value_head_targets])