From 188b86369a459f2fb3f042b1a0d52d587833f7e4 Mon Sep 17 00:00:00 2001 From: shadiendrawis Date: Sun, 10 Nov 2019 17:20:44 +0200 Subject: [PATCH] fix e-greedy in case action values were equal (#423) --- rl_coach/exploration_policies/e_greedy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rl_coach/exploration_policies/e_greedy.py b/rl_coach/exploration_policies/e_greedy.py index b9cb885..72624e2 100644 --- a/rl_coach/exploration_policies/e_greedy.py +++ b/rl_coach/exploration_policies/e_greedy.py @@ -90,7 +90,8 @@ class EGreedy(ExplorationPolicy): probabilities = np.full(len(self.action_space.actions), 1. / (self.action_space.high[0] - self.action_space.low[0] + 1)) else: - chosen_action = np.argmax(action_values) + chosen_action = np.argmax(np.random.random(action_values.shape) * + (np.isclose(action_values, action_values.max()))) # one-hot probabilities vector probabilities = np.zeros(len(self.action_space.actions))