bug fixes for OPE (#311)

2026-02-14 04:45:50 +01:00 · 2019-05-21 16:39:11 +03:00
parent 85d70dd7d5
commit acceb03ac0
8 changed files with 38 additions and 21 deletions
--- a/rl_coach/graph_managers/batch_rl_graph_manager.py
+++ b/rl_coach/graph_managers/batch_rl_graph_manager.py
@@ -173,12 +173,12 @@ class BatchRLGraphManager(BasicRLGraphManager):
        """
        agent = self.level_managers[0].agents['agent']

-        screen.log_title("Training a regression model for estimating MDP rewards")
-        agent.improve_reward_model(epochs=self.reward_model_num_epochs)
-
        # prepare dataset to be consumed in the expected formats for OPE
        agent.memory.prepare_evaluation_dataset()

+        screen.log_title("Training a regression model for estimating MDP rewards")
+        agent.improve_reward_model(epochs=self.reward_model_num_epochs)
+
        screen.log_title("Collecting static statistics for OPE")
        agent.ope_manager.gather_static_shared_stats(evaluation_dataset_as_transitions=
                                                     agent.memory.evaluation_dataset_as_transitions,