mirror of
https://github.com/gryf/coach.git
synced 2026-03-15 14:13:35 +01:00
Enabling-more-agents-for-Batch-RL-and-cleanup (#258)
allowing for the last training batch drawn to be smaller than batch_size + adding support for more agents in BatchRL by adding softmax with temperature to the corresponding heads + adding a CartPole_QR_DQN preset with a golden test + cleanups
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import ast
|
||||
import math
|
||||
|
||||
import pandas as pd
|
||||
from typing import List, Tuple, Union
|
||||
@@ -163,9 +164,8 @@ class EpisodicExperienceReplay(Memory):
|
||||
shuffled_transition_indices = list(range(self.last_training_set_transition_id))
|
||||
random.shuffle(shuffled_transition_indices)
|
||||
|
||||
# we deliberately drop some of the ending data which is left after dividing to batches of size `size`
|
||||
# for i in range(math.ceil(len(shuffled_transition_indices) / size)):
|
||||
for i in range(int(len(shuffled_transition_indices) / size)):
|
||||
# The last batch drawn will usually be < batch_size (=the size variable)
|
||||
for i in range(math.ceil(len(shuffled_transition_indices) / size)):
|
||||
sample_data = [self.transitions[j] for j in shuffled_transition_indices[i * size: (i + 1) * size]]
|
||||
self.reader_writer_lock.release_writing()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user