update comment describing why the output filters don't modify Agent.last_action_info

2025-12-17 19:20:19 +01:00 · 2019-04-08 12:14:35 -04:00
parent fd2c210915
commit 88f9c926ab
1 changed files with 4 additions and 1 deletions
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -818,7 +818,10 @@ class Agent(AgentInterface):

        self.last_action_info = action

-        # is it intentional that self.last_action_info is not filtered?
+        # output filters are explicitly applied after recording self.last_action_info. This is
+        # because the output filters may change the representation of the action so that the agent
+        # can no longer use the transition in it's replay buffer. It is possible that these filters
+        # could be moved to the environment instead, but they are here now for historical reasons.
        filtered_action_info = self.output_filter.filter(self.last_action_info)

        return filtered_action_info