Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe. * Emulate act and observe on trainer side to update internal vars.
2026-03-04 07:45:53 +01:00 · 2018-11-15 08:38:58 -08:00
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions
--- a/rl_coach/agents/clipped_ppo_agent.py
+++ b/rl_coach/agents/clipped_ppo_agent.py
@@ -112,6 +112,7 @@ class ClippedPPOAlgorithmParameters(AlgorithmParameters):
        self.optimization_epochs = 10
        self.normalization_stats = None
        self.clipping_decay_schedule = ConstantSchedule(1)
+        self.act_for_full_episodes = True


 class ClippedPPOAgentParameters(AgentParameters):
@@ -294,11 +295,8 @@ class ClippedPPOAgent(ActorCriticAgent):
        # clean memory
        self.call_memory('clean')

-    def _should_train_helper(self, wait_for_full_episode=True):
-        return super()._should_train_helper(True)
-
    def train(self):
-        if self._should_train(wait_for_full_episode=True):
+        if self._should_train():
            for network in self.networks.values():
                network.set_is_training(True)

@@ -334,3 +332,4 @@ class ClippedPPOAgent(ActorCriticAgent):
    def choose_action(self, curr_state):
        self.ap.algorithm.clipping_decay_schedule.step()
        return super().choose_action(curr_state)
+