Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe. * Emulate act and observe on trainer side to update internal vars.
2026-02-17 14:45:50 +01:00 · 2018-11-15 08:38:58 -08:00
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions
--- a/rl_coach/agents/ppo_agent.py
+++ b/rl_coach/agents/ppo_agent.py
@@ -121,6 +121,7 @@ class PPOAlgorithmParameters(AlgorithmParameters):
        self.use_kl_regularization = True
        self.beta_entropy = 0.01
        self.num_consecutive_playing_steps = EnvironmentSteps(5000)
+        self.act_for_full_episodes = True


 class PPOAgentParameters(AgentParameters):
@@ -354,12 +355,9 @@ class PPOAgent(ActorCriticAgent):
        # clean memory
        self.call_memory('clean')

-    def _should_train_helper(self, wait_for_full_episode=True):
-        return super()._should_train_helper(True)
-
    def train(self):
        loss = 0
-        if self._should_train(wait_for_full_episode=True):
+        if self._should_train():
            for network in self.networks.values():
                network.set_is_training(True)

@@ -391,3 +389,4 @@ class PPOAgent(ActorCriticAgent):
    def get_prediction(self, states):
        tf_input_state = self.prepare_batch_for_inference(states, "actor")
        return self.networks['actor'].online_network.predict(tf_input_state)
+