mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Distiller's AMC induced changes (#359)
* override episode rewards with the last transition reward * EWMA normalization filter * allowing control over when the pre_network filter runs
This commit is contained in:
@@ -88,9 +88,6 @@ class TruncatedNormal(ContinuousActionExplorationPolicy):
|
||||
else:
|
||||
action_values_std = current_noise
|
||||
|
||||
# scale the noise to the action space range
|
||||
action_values_std = current_noise * (self.action_space.high - self.action_space.low)
|
||||
|
||||
# extract the mean values
|
||||
if isinstance(action_values, list):
|
||||
# the action values are expected to be a list with the action mean and optionally the action stdev
|
||||
|
||||
Reference in New Issue
Block a user