bug-fix for l2_regularization not in use (#230)

* bug-fix for l2_regularization not in use * removing not in use TF REGULARIZATION_LOSSES collection
2026-02-17 23:05:51 +01:00 · 2019-03-03 15:11:06 +02:00
parent 10220be9be
commit 9a895a1ac7
5 changed files with 21 additions and 22 deletions
--- a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
@@ -56,7 +56,6 @@ class ACERPolicyHead(Head):
            if self.beta:
                self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
                self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
-            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)

            # Truncated importance sampling with bias corrections
            importance_sampling_weight = tf.placeholder(tf.float32, [None, self.num_actions],
--- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py
@@ -78,8 +78,6 @@ class PolicyHead(Head):
                self.entropy = tf.add_n([tf.reduce_mean(dist.entropy()) for dist in self.policy_distributions])
                self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]

-            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
-
            # calculate loss
            self.action_log_probs_wrt_policy = \
                tf.add_n([dist.log_prob(action) for dist, action in zip(self.policy_distributions, self.actions)])
--- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
@@ -68,9 +68,8 @@ class PPOHead(Head):
        if self.use_kl_regularization:
            # no clipping => use kl regularization
            self.weighted_kl_divergence = tf.multiply(self.kl_coefficient, self.kl_divergence)
-            self.regularizations = self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
-                                                tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))
-            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+            self.regularizations += [self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
+                                                tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))]

        # calculate surrogate loss
        self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
@@ -93,8 +92,7 @@ class PPOHead(Head):
            # add entropy regularization
            if self.beta:
                self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
-                self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
-                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
+                self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]

        self.loss = self.surrogate_loss
        tf.losses.add_loss(self.loss)