mirror of
https://github.com/gryf/coach.git
synced 2026-02-28 21:35:46 +01:00
Enabling-more-agents-for-Batch-RL-and-cleanup (#258)
allowing for the last training batch drawn to be smaller than batch_size + adding support for more agents in BatchRL by adding softmax with temperature to the corresponding heads + adding a CartPole_QR_DQN preset with a golden test + cleanups
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import ast
|
||||
import math
|
||||
|
||||
import pandas as pd
|
||||
from typing import List, Tuple, Union
|
||||
@@ -163,9 +164,8 @@ class EpisodicExperienceReplay(Memory):
|
||||
shuffled_transition_indices = list(range(self.last_training_set_transition_id))
|
||||
random.shuffle(shuffled_transition_indices)
|
||||
|
||||
# we deliberately drop some of the ending data which is left after dividing to batches of size `size`
|
||||
# for i in range(math.ceil(len(shuffled_transition_indices) / size)):
|
||||
for i in range(int(len(shuffled_transition_indices) / size)):
|
||||
# The last batch drawn will usually be < batch_size (=the size variable)
|
||||
for i in range(math.ceil(len(shuffled_transition_indices) / size)):
|
||||
sample_data = [self.transitions[j] for j in shuffled_transition_indices[i * size: (i + 1) * size]]
|
||||
self.reader_writer_lock.release_writing()
|
||||
|
||||
|
||||
@@ -113,10 +113,6 @@ class ExperienceReplay(Memory):
|
||||
|
||||
yield sample_data
|
||||
|
||||
## usage example
|
||||
# for o in random_seq_generator(list(range(10)), 4):
|
||||
# print(o)
|
||||
|
||||
def _enforce_max_length(self) -> None:
|
||||
"""
|
||||
Make sure that the size of the replay buffer does not pass the maximum size allowed.
|
||||
|
||||
Reference in New Issue
Block a user