From f16cd3cb1e3c7579286b587ab5b655b663cc85fe Mon Sep 17 00:00:00 2001 From: zach dwiel Date: Thu, 4 Apr 2019 11:55:42 -0400 Subject: [PATCH] remove unused ActionInfo.action_intrinsic_reward --- rl_coach/agents/agent.py | 3 --- rl_coach/core_types.py | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index f497e14..cc2665c 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -894,9 +894,6 @@ class Agent(AgentInterface): # make agent specific changes to the transition if needed transition = self.update_transition_before_adding_to_replay_buffer(transition) - # merge the intrinsic reward in - transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward - # sum up the total shaped reward self.total_shaped_reward_in_current_episode += transition.reward self.total_reward_in_current_episode += env_response.reward diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py index 6321e9e..9fa07ef 100644 --- a/rl_coach/core_types.py +++ b/rl_coach/core_types.py @@ -344,8 +344,7 @@ class ActionInfo(object): """ def __init__(self, action: ActionType, all_action_probabilities: float=0, - action_value: float=0., state_value: float=0., max_action_value: float=None, - action_intrinsic_reward: float=0): + action_value: float=0., state_value: float=0., max_action_value: float=None): """ :param action: the action :param all_action_probabilities: the probability that the action was given when selecting it @@ -354,8 +353,6 @@ class ActionInfo(object): :param max_action_value: in case this is an action that was selected randomly, this is the value of the action that received the maximum value. if no value is given, the action is assumed to be the action with the maximum value - :param action_intrinsic_reward: can contain any intrinsic reward that the agent wants to add to this action - selection """ self.action = action self.all_action_probabilities = all_action_probabilities @@ -365,7 +362,6 @@ class ActionInfo(object): self.max_action_value = action_value else: self.max_action_value = max_action_value - self.action_intrinsic_reward = action_intrinsic_reward class Batch(object):