BCQ variant on top of DDQN (#276)

* kNN based model for predicting which actions to drop * fix for seeds with batch rl
2026-02-15 05:25:55 +01:00 · 2019-04-16 17:06:23 +03:00
parent bdb9b224a8
commit 4741b0b916
11 changed files with 451 additions and 62 deletions
--- a/rl_coach/graph_managers/batch_rl_graph_manager.py
+++ b/rl_coach/graph_managers/batch_rl_graph_manager.py
@@ -77,8 +77,9 @@ class BatchRLGraphManager(BasicRLGraphManager):
        self.agent_params.name = "agent"
        self.agent_params.is_batch_rl_training = True

-        # user hasn't defined params for the reward model. we will use the same params as used for the 'main' network.
        if 'reward_model' not in self.agent_params.network_wrappers:
+            # user hasn't defined params for the reward model. we will use the same params as used for the 'main'
+            # network.
            self.agent_params.network_wrappers['reward_model'] = deepcopy(self.agent_params.network_wrappers['main'])

        agent = short_dynamic_import(self.agent_params.path)(self.agent_params)