mirror of
https://github.com/gryf/coach.git
synced 2026-03-30 08:33:33 +02:00
Distiller's AMC induced changes (#359)
* override episode rewards with the last transition reward * EWMA normalization filter * allowing control over when the pre_network filter runs
This commit is contained in:
@@ -88,9 +88,6 @@ class TruncatedNormal(ContinuousActionExplorationPolicy):
|
||||
else:
|
||||
action_values_std = current_noise
|
||||
|
||||
# scale the noise to the action space range
|
||||
action_values_std = current_noise * (self.action_space.high - self.action_space.low)
|
||||
|
||||
# extract the mean values
|
||||
if isinstance(action_values, list):
|
||||
# the action values are expected to be a list with the action mean and optionally the action stdev
|
||||
|
||||
Reference in New Issue
Block a user