Batch RL (#238)

2026-07-08 10:26:32 +02:00 · 2019-03-19 18:07:09 +02:00
parent 4a8451ff02
commit e3c7e526c7
38 changed files with 1003 additions and 87 deletions
@@ -50,6 +50,11 @@ class QHead(Head):
        # Standard Q Network
        self.output = self.dense_layer(self.num_actions)(input_layer, name='output')

+        # TODO add this to other Q heads. e.g. dueling.
+        temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
+        temperature_scaled_outputs = self.output / temperature
+        self.softmax = tf.nn.softmax(temperature_scaled_outputs, name="softmax")
+
    def __str__(self):
        result = [
            "Dense (num outputs = {})".format(self.num_actions)
@@ -42,7 +42,7 @@ class GlobalVariableSaver(Saver):
            self._variable_placeholders.append(variable_placeholder)
            self._variable_update_ops.append(v.assign(variable_placeholder))

-        self._saver = tf.train.Saver(self._variables)
+        self._saver = tf.train.Saver(self._variables, max_to_keep=None)

    @property
    def path(self):