mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
fix e-greedy in case action values were equal (#423)
This commit is contained in:
@@ -90,7 +90,8 @@ class EGreedy(ExplorationPolicy):
|
|||||||
probabilities = np.full(len(self.action_space.actions),
|
probabilities = np.full(len(self.action_space.actions),
|
||||||
1. / (self.action_space.high[0] - self.action_space.low[0] + 1))
|
1. / (self.action_space.high[0] - self.action_space.low[0] + 1))
|
||||||
else:
|
else:
|
||||||
chosen_action = np.argmax(action_values)
|
chosen_action = np.argmax(np.random.random(action_values.shape) *
|
||||||
|
(np.isclose(action_values, action_values.max())))
|
||||||
|
|
||||||
# one-hot probabilities vector
|
# one-hot probabilities vector
|
||||||
probabilities = np.zeros(len(self.action_space.actions))
|
probabilities = np.zeros(len(self.action_space.actions))
|
||||||
|
|||||||
Reference in New Issue
Block a user