From 88f9c926ab4156bd6b2bcf47e6bdc9cf3e415794 Mon Sep 17 00:00:00 2001 From: zach dwiel Date: Mon, 8 Apr 2019 12:14:35 -0400 Subject: [PATCH] update comment describing why the output filters don't modify Agent.last_action_info --- rl_coach/agents/agent.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index f4f0fc8..8517fc0 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -818,7 +818,10 @@ class Agent(AgentInterface): self.last_action_info = action - # is it intentional that self.last_action_info is not filtered? + # output filters are explicitly applied after recording self.last_action_info. This is + # because the output filters may change the representation of the action so that the agent + # can no longer use the transition in it's replay buffer. It is possible that these filters + # could be moved to the environment instead, but they are here now for historical reasons. filtered_action_info = self.output_filter.filter(self.last_action_info) return filtered_action_info