remove unused ActionInfo.action_intrinsic_reward

2025-12-17 19:20:19 +01:00 · 2019-04-04 11:55:42 -04:00
parent 7d79433c05
commit f16cd3cb1e
2 changed files with 1 additions and 8 deletions
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -894,9 +894,6 @@ class Agent(AgentInterface):
            # make agent specific changes to the transition if needed
            transition = self.update_transition_before_adding_to_replay_buffer(transition)

-            # merge the intrinsic reward in
-            transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward
-
            # sum up the total shaped reward
            self.total_shaped_reward_in_current_episode += transition.reward
            self.total_reward_in_current_episode += env_response.reward
--- a/rl_coach/core_types.py
+++ b/rl_coach/core_types.py
@@ -344,8 +344,7 @@ class ActionInfo(object):
    """

    def __init__(self, action: ActionType, all_action_probabilities: float=0,
-                 action_value: float=0., state_value: float=0., max_action_value: float=None,
-                 action_intrinsic_reward: float=0):
+                 action_value: float=0., state_value: float=0., max_action_value: float=None):
        """
        :param action: the action
        :param all_action_probabilities: the probability that the action was given when selecting it
@@ -354,8 +353,6 @@ class ActionInfo(object):
        :param max_action_value: in case this is an action that was selected randomly, this is the value of the action
                                 that received the maximum value. if no value is given, the action is assumed to be the
                                 action with the maximum value
-        :param action_intrinsic_reward: can contain any intrinsic reward that the agent wants to add to this action
-                                        selection
        """
        self.action = action
        self.all_action_probabilities = all_action_probabilities
@@ -365,7 +362,6 @@ class ActionInfo(object):
            self.max_action_value = action_value
        else:
            self.max_action_value = max_action_value
-        self.action_intrinsic_reward = action_intrinsic_reward


 class Batch(object):