mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 11:40:18 +01:00
Batch RL (#238)
This commit is contained in:
@@ -50,6 +50,11 @@ class QHead(Head):
|
||||
# Standard Q Network
|
||||
self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
|
||||
|
||||
# TODO add this to other Q heads. e.g. dueling.
|
||||
temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
|
||||
temperature_scaled_outputs = self.output / temperature
|
||||
self.softmax = tf.nn.softmax(temperature_scaled_outputs, name="softmax")
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions)
|
||||
|
||||
@@ -42,7 +42,7 @@ class GlobalVariableSaver(Saver):
|
||||
self._variable_placeholders.append(variable_placeholder)
|
||||
self._variable_update_ops.append(v.assign(variable_placeholder))
|
||||
|
||||
self._saver = tf.train.Saver(self._variables)
|
||||
self._saver = tf.train.Saver(self._variables, max_to_keep=None)
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
|
||||
Reference in New Issue
Block a user