Integrate coach.py params with distributed Coach. (#42)

* Integrate coach.py params with distributed Coach. * Minor improvements - Use enums instead of constants. - Reduce code duplication. - Ask experiment name with timeout.
2026-03-06 01:05:47 +01:00 · 2018-11-05 09:33:30 -08:00
parent 95b4fc6888
commit 7e7006305a
13 changed files with 263 additions and 285 deletions
--- a/rl_coach/base_parameters.py
+++ b/rl_coach/base_parameters.py
@@ -53,6 +53,18 @@ class EmbeddingMergerType(Enum):
    #Multiply = 3


+# DistributedCoachSynchronizationType provides the synchronization type for distributed Coach.
+# The default value is None, which means the algorithm or preset cannot be used with distributed Coach.
+class DistributedCoachSynchronizationType(Enum):
+    # In SYNC mode, the trainer waits for all the experiences to be gathered from distributed rollout workers before
+    # training a new policy and the rollout workers wait for a new policy before gathering experiences.
+    SYNC = "sync"
+
+    # In ASYNC mode, the trainer doesn't wait for any set of experiences to be gathered from distributed rollout workers
+    # and the rollout workers continously gather experiences loading new policies, whenever they become available.
+    ASYNC = "async"
+
+
 def iterable_to_items(obj):
    if isinstance(obj, dict) or isinstance(obj, OrderedDict) or isinstance(obj, types.MappingProxyType):
        items = obj.items()
@@ -154,6 +166,9 @@ class AlgorithmParameters(Parameters):
        # intrinsic reward
        self.scale_external_reward_by_intrinsic_reward_value = False

+        # Distributed Coach params
+        self.distributed_coach_synchronization_type = None
+

 class PresetValidationParameters(Parameters):
    def __init__(self):