Enabling-more-agents-for-Batch-RL-and-cleanup (#258)

allowing for the last training batch drawn to be smaller than batch_size + adding support for more agents in BatchRL by adding softmax with temperature to the corresponding heads + adding a CartPole_QR_DQN preset with a golden test + cleanups
2026-03-12 04:25:53 +01:00 · 2019-03-21 16:10:29 +02:00
parent abec59f367
commit 6e08c55ad5
24 changed files with 152 additions and 69 deletions
--- a/rl_coach/architectures/tensorflow_components/heads/q_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/q_head.py
@@ -48,15 +48,19 @@ class QHead(Head):

    def _build_module(self, input_layer):
        # Standard Q Network
-        self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
+        self.q_values = self.output = self.dense_layer(self.num_actions)(input_layer, name='output')

-        # TODO add this to other Q heads. e.g. dueling.
-        temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
-        temperature_scaled_outputs = self.output / temperature
-        self.softmax = tf.nn.softmax(temperature_scaled_outputs, name="softmax")
+        # used in batch-rl to estimate a probablity distribution over actions
+        self.softmax = self.add_softmax_with_temperature()

    def __str__(self):
        result = [
            "Dense (num outputs = {})".format(self.num_actions)
        ]
        return '\n'.join(result)
+
+    def add_softmax_with_temperature(self):
+        temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
+        temperature_scaled_outputs = self.q_values / temperature
+        return tf.nn.softmax(temperature_scaled_outputs, name="softmax")
+