mirror of
https://github.com/gryf/coach.git
synced 2025-12-18 03:30:19 +01:00
Enabling-more-agents-for-Batch-RL-and-cleanup (#258)
allowing for the last training batch drawn to be smaller than batch_size + adding support for more agents in BatchRL by adding softmax with temperature to the corresponding heads + adding a CartPole_QR_DQN preset with a golden test + cleanups
This commit is contained in:
@@ -48,15 +48,19 @@ class QHead(Head):
|
||||
|
||||
def _build_module(self, input_layer):
|
||||
# Standard Q Network
|
||||
self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
|
||||
self.q_values = self.output = self.dense_layer(self.num_actions)(input_layer, name='output')
|
||||
|
||||
# TODO add this to other Q heads. e.g. dueling.
|
||||
temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
|
||||
temperature_scaled_outputs = self.output / temperature
|
||||
self.softmax = tf.nn.softmax(temperature_scaled_outputs, name="softmax")
|
||||
# used in batch-rl to estimate a probablity distribution over actions
|
||||
self.softmax = self.add_softmax_with_temperature()
|
||||
|
||||
def __str__(self):
|
||||
result = [
|
||||
"Dense (num outputs = {})".format(self.num_actions)
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
def add_softmax_with_temperature(self):
|
||||
temperature = self.ap.network_wrappers[self.network_name].softmax_temperature
|
||||
temperature_scaled_outputs = self.q_values / temperature
|
||||
return tf.nn.softmax(temperature_scaled_outputs, name="softmax")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user