mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 11:10:20 +01:00
Enabling-more-agents-for-Batch-RL-and-cleanup (#258)
allowing for the last training batch drawn to be smaller than batch_size + adding support for more agents in BatchRL by adding softmax with temperature to the corresponding heads + adding a CartPole_QR_DQN preset with a golden test + cleanups
This commit is contained in:
@@ -207,6 +207,8 @@ class ClippedPPOAgent(ActorCriticAgent):
|
||||
self.networks['main'].online_network.output_heads[1].likelihood_ratio,
|
||||
self.networks['main'].online_network.output_heads[1].clipped_likelihood_ratio]
|
||||
|
||||
# TODO-fixme if batch.size / self.ap.network_wrappers['main'].batch_size is not an integer, we do not train on
|
||||
# some of the data
|
||||
for i in range(int(batch.size / self.ap.network_wrappers['main'].batch_size)):
|
||||
start = i * self.ap.network_wrappers['main'].batch_size
|
||||
end = (i + 1) * self.ap.network_wrappers['main'].batch_size
|
||||
|
||||
Reference in New Issue
Block a user