load and save function for non-episodic replay buffers + carla improvements + network bug fixes

2026-03-18 15:53:35 +01:00 · 2018-09-06 16:46:57 +03:00
parent d59a700248
commit a9bd1047c4
8 changed files with 50 additions and 18 deletions
--- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py
@@ -49,7 +49,7 @@ class PolicyHead(Head):
        # a scalar weight that penalizes low entropy values to encourage exploration
        if hasattr(agent_parameters.algorithm, 'beta_entropy'):
            # we set the beta value as a tf variable so it can be updated later if needed
-            self.beta = tf.Variable(agent_parameters.algorithm.beta_entropy,
+            self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy),
                                    trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
            self.beta_placeholder = tf.placeholder('float')
            self.set_beta = tf.assign(self.beta, self.beta_placeholder)