mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
remove unused ActionInfo.action_intrinsic_reward
This commit is contained in:
@@ -894,9 +894,6 @@ class Agent(AgentInterface):
|
|||||||
# make agent specific changes to the transition if needed
|
# make agent specific changes to the transition if needed
|
||||||
transition = self.update_transition_before_adding_to_replay_buffer(transition)
|
transition = self.update_transition_before_adding_to_replay_buffer(transition)
|
||||||
|
|
||||||
# merge the intrinsic reward in
|
|
||||||
transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward
|
|
||||||
|
|
||||||
# sum up the total shaped reward
|
# sum up the total shaped reward
|
||||||
self.total_shaped_reward_in_current_episode += transition.reward
|
self.total_shaped_reward_in_current_episode += transition.reward
|
||||||
self.total_reward_in_current_episode += env_response.reward
|
self.total_reward_in_current_episode += env_response.reward
|
||||||
|
|||||||
@@ -344,8 +344,7 @@ class ActionInfo(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, action: ActionType, all_action_probabilities: float=0,
|
def __init__(self, action: ActionType, all_action_probabilities: float=0,
|
||||||
action_value: float=0., state_value: float=0., max_action_value: float=None,
|
action_value: float=0., state_value: float=0., max_action_value: float=None):
|
||||||
action_intrinsic_reward: float=0):
|
|
||||||
"""
|
"""
|
||||||
:param action: the action
|
:param action: the action
|
||||||
:param all_action_probabilities: the probability that the action was given when selecting it
|
:param all_action_probabilities: the probability that the action was given when selecting it
|
||||||
@@ -354,8 +353,6 @@ class ActionInfo(object):
|
|||||||
:param max_action_value: in case this is an action that was selected randomly, this is the value of the action
|
:param max_action_value: in case this is an action that was selected randomly, this is the value of the action
|
||||||
that received the maximum value. if no value is given, the action is assumed to be the
|
that received the maximum value. if no value is given, the action is assumed to be the
|
||||||
action with the maximum value
|
action with the maximum value
|
||||||
:param action_intrinsic_reward: can contain any intrinsic reward that the agent wants to add to this action
|
|
||||||
selection
|
|
||||||
"""
|
"""
|
||||||
self.action = action
|
self.action = action
|
||||||
self.all_action_probabilities = all_action_probabilities
|
self.all_action_probabilities = all_action_probabilities
|
||||||
@@ -365,7 +362,6 @@ class ActionInfo(object):
|
|||||||
self.max_action_value = action_value
|
self.max_action_value = action_value
|
||||||
else:
|
else:
|
||||||
self.max_action_value = max_action_value
|
self.max_action_value = max_action_value
|
||||||
self.action_intrinsic_reward = action_intrinsic_reward
|
|
||||||
|
|
||||||
|
|
||||||
class Batch(object):
|
class Batch(object):
|
||||||
|
|||||||
Reference in New Issue
Block a user