diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py index 84f428c..362dd4c 100644 --- a/rl_coach/agents/clipped_ppo_agent.py +++ b/rl_coach/agents/clipped_ppo_agent.py @@ -194,7 +194,9 @@ class ClippedPPOAgent(ActorCriticAgent): for input_index, input in enumerate(old_policy_distribution): inputs['output_1_{}'.format(input_index + 1)] = input - inputs['output_1_3'] = self.ap.algorithm.clipping_decay_schedule.current_value + # update the clipping decay schedule value + inputs['output_1_{}'.format(len(old_policy_distribution)+1)] = \ + self.ap.algorithm.clipping_decay_schedule.current_value total_loss, losses, unclipped_grads, fetch_result = \ self.networks['main'].train_and_sync_networks( diff --git a/rl_coach/exploration_policies/exploration_policy.py b/rl_coach/exploration_policies/exploration_policy.py index b4b5a6d..679f8b8 100644 --- a/rl_coach/exploration_policies/exploration_policy.py +++ b/rl_coach/exploration_policies/exploration_policy.py @@ -52,7 +52,12 @@ class ExplorationPolicy(object): :param action_values: A list of action values :return: The chosen action """ - pass + if self.__class__ == ExplorationPolicy: + raise ValueError("The ExplorationPolicy class is an abstract class and should not be used directly. " + "Please set the exploration parameters to point to an inheriting class like EGreedy or " + "AdditiveNoise") + else: + raise ValueError("The get_action function should be overridden in the inheriting exploration class") def change_phase(self, phase): """