1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 19:20:19 +01:00

remove unused ActionInfo.action_intrinsic_reward

This commit is contained in:
zach dwiel
2019-04-04 11:55:42 -04:00
committed by Zach Dwiel
parent 7d79433c05
commit f16cd3cb1e
2 changed files with 1 additions and 8 deletions

View File

@@ -894,9 +894,6 @@ class Agent(AgentInterface):
# make agent specific changes to the transition if needed # make agent specific changes to the transition if needed
transition = self.update_transition_before_adding_to_replay_buffer(transition) transition = self.update_transition_before_adding_to_replay_buffer(transition)
# merge the intrinsic reward in
transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward
# sum up the total shaped reward # sum up the total shaped reward
self.total_shaped_reward_in_current_episode += transition.reward self.total_shaped_reward_in_current_episode += transition.reward
self.total_reward_in_current_episode += env_response.reward self.total_reward_in_current_episode += env_response.reward

View File

@@ -344,8 +344,7 @@ class ActionInfo(object):
""" """
def __init__(self, action: ActionType, all_action_probabilities: float=0, def __init__(self, action: ActionType, all_action_probabilities: float=0,
action_value: float=0., state_value: float=0., max_action_value: float=None, action_value: float=0., state_value: float=0., max_action_value: float=None):
action_intrinsic_reward: float=0):
""" """
:param action: the action :param action: the action
:param all_action_probabilities: the probability that the action was given when selecting it :param all_action_probabilities: the probability that the action was given when selecting it
@@ -354,8 +353,6 @@ class ActionInfo(object):
:param max_action_value: in case this is an action that was selected randomly, this is the value of the action :param max_action_value: in case this is an action that was selected randomly, this is the value of the action
that received the maximum value. if no value is given, the action is assumed to be the that received the maximum value. if no value is given, the action is assumed to be the
action with the maximum value action with the maximum value
:param action_intrinsic_reward: can contain any intrinsic reward that the agent wants to add to this action
selection
""" """
self.action = action self.action = action
self.all_action_probabilities = all_action_probabilities self.all_action_probabilities = all_action_probabilities
@@ -365,7 +362,6 @@ class ActionInfo(object):
self.max_action_value = action_value self.max_action_value = action_value
else: else:
self.max_action_value = max_action_value self.max_action_value = max_action_value
self.action_intrinsic_reward = action_intrinsic_reward
class Batch(object): class Batch(object):