1
0
mirror of https://github.com/gryf/coach.git synced 2025-12-17 11:10:20 +01:00

fix e-greedy in case action values were equal (#423)

This commit is contained in:
shadiendrawis
2019-11-10 17:20:44 +02:00
committed by GitHub
parent 6ca91b9090
commit 188b86369a

View File

@@ -90,7 +90,8 @@ class EGreedy(ExplorationPolicy):
probabilities = np.full(len(self.action_space.actions),
1. / (self.action_space.high[0] - self.action_space.low[0] + 1))
else:
chosen_action = np.argmax(action_values)
chosen_action = np.argmax(np.random.random(action_values.shape) *
(np.isclose(action_values, action_values.max())))
# one-hot probabilities vector
probabilities = np.zeros(len(self.action_space.actions))