1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

bug fixes for OPE (#311)

This commit is contained in:
Gal Leibovich
2019-05-21 16:39:11 +03:00
committed by GitHub
parent 85d70dd7d5
commit acceb03ac0
8 changed files with 38 additions and 21 deletions

View File

@@ -173,12 +173,12 @@ class BatchRLGraphManager(BasicRLGraphManager):
"""
agent = self.level_managers[0].agents['agent']
screen.log_title("Training a regression model for estimating MDP rewards")
agent.improve_reward_model(epochs=self.reward_model_num_epochs)
# prepare dataset to be consumed in the expected formats for OPE
agent.memory.prepare_evaluation_dataset()
screen.log_title("Training a regression model for estimating MDP rewards")
agent.improve_reward_model(epochs=self.reward_model_num_epochs)
screen.log_title("Collecting static statistics for OPE")
agent.ope_manager.gather_static_shared_stats(evaluation_dataset_as_transitions=
agent.memory.evaluation_dataset_as_transitions,