1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

BCQ variant on top of DDQN (#276)

* kNN based model for predicting which actions to drop
* fix for seeds with batch rl
This commit is contained in:
Gal Leibovich
2019-04-16 17:06:23 +03:00
committed by GitHub
parent bdb9b224a8
commit 4741b0b916
11 changed files with 451 additions and 62 deletions

View File

@@ -77,8 +77,9 @@ class BatchRLGraphManager(BasicRLGraphManager):
self.agent_params.name = "agent"
self.agent_params.is_batch_rl_training = True
# user hasn't defined params for the reward model. we will use the same params as used for the 'main' network.
if 'reward_model' not in self.agent_params.network_wrappers:
# user hasn't defined params for the reward model. we will use the same params as used for the 'main'
# network.
self.agent_params.network_wrappers['reward_model'] = deepcopy(self.agent_params.network_wrappers['main'])
agent = short_dynamic_import(self.agent_params.path)(self.agent_params)