1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

update comment describing why the output filters don't modify Agent.last_action_info

This commit is contained in:
zach dwiel
2019-04-08 12:14:35 -04:00
committed by Zach Dwiel
parent fd2c210915
commit 88f9c926ab

View File

@@ -818,7 +818,10 @@ class Agent(AgentInterface):
self.last_action_info = action
# is it intentional that self.last_action_info is not filtered?
# output filters are explicitly applied after recording self.last_action_info. This is
# because the output filters may change the representation of the action so that the agent
# can no longer use the transition in it's replay buffer. It is possible that these filters
# could be moved to the environment instead, but they are here now for historical reasons.
filtered_action_info = self.output_filter.filter(self.last_action_info)
return filtered_action_info