From 7d79433c05a578d2c2835756498048923f363313 Mon Sep 17 00:00:00 2001 From: zach dwiel Date: Thu, 4 Apr 2019 11:54:18 -0400 Subject: [PATCH] remove unused parameter scale_external_reward_by_intrinsic_reward_value --- .gitignore | 2 +- rl_coach/agents/agent.py | 7 ++----- rl_coach/base_parameters.py | 3 --- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 50bcf54..c0ace1d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ roboschool *.orig docs/site coach_env +venv build rl_coach.egg* rl_coach_slim.egg* @@ -32,4 +33,3 @@ trace_test* .cache/ *.pyc coachenv - diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index bbd9004..f497e14 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -895,10 +895,7 @@ class Agent(AgentInterface): transition = self.update_transition_before_adding_to_replay_buffer(transition) # merge the intrinsic reward in - if self.ap.algorithm.scale_external_reward_by_intrinsic_reward_value: - transition.reward = transition.reward * (1 + self.last_action_info.action_intrinsic_reward) - else: - transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward + transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward # sum up the total shaped reward self.total_shaped_reward_in_current_episode += transition.reward @@ -1026,7 +1023,7 @@ class Agent(AgentInterface): self.total_reward_in_current_episode += transition.reward self.shaped_reward.add_sample(transition.reward) self.reward.add_sample(transition.reward) - + # create and store the transition if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: # for episodic memories we keep the transitions in a local buffer until the episode is ended. diff --git a/rl_coach/base_parameters.py b/rl_coach/base_parameters.py index e462e2b..815a78a 100644 --- a/rl_coach/base_parameters.py +++ b/rl_coach/base_parameters.py @@ -200,9 +200,6 @@ class AlgorithmParameters(Parameters): # distributed agents params self.share_statistics_between_workers = True - # intrinsic reward - self.scale_external_reward_by_intrinsic_reward_value = False - # n-step returns self.n_step = -1 # calculate the total return (no bootstrap, by default)