Simulating the act on the trainer. (#65)

* Remove the use of daemon threads for Redis subscribe. * Emulate act and observe on trainer side to update internal vars.
2026-02-13 12:25:47 +01:00 · 2018-11-15 08:38:58 -08:00
parent fe6857eabd
commit fde73ced13
13 changed files with 221 additions and 55 deletions
--- a/rl_coach/agents/agent_interface.py
+++ b/rl_coach/agents/agent_interface.py
@@ -18,7 +18,7 @@ from typing import Union, List, Dict

 import numpy as np

-from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType
+from rl_coach.core_types import EnvResponse, ActionInfo, RunPhase, PredictionType, ActionType, Transition


 class AgentInterface(object):
@@ -123,3 +123,33 @@ class AgentInterface(object):
        :return: None
        """
        raise NotImplementedError("")
+
+    # TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
+    #         an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
+    def emulate_observe_on_trainer(self, transition: Transition) -> bool:
+        """
+        This emulates the act using the transition obtained from the rollout worker on the training worker
+        in case of distributed training.
+        Gets a response from the environment.
+        Processes this information for later use. For example, create a transition and store it in memory.
+        The action info (a class containing any info the agent wants to store regarding its action decision process) is
+        stored by the agent itself when deciding on the action.
+        :param env_response: a EnvResponse containing the response from the environment
+        :return: a done signal which is based on the agent knowledge. This can be different from the done signal from
+                 the environment. For example, an agent can decide to finish the episode each time it gets some
+                 intrinsic reward
+        """
+        raise NotImplementedError("")
+
+    # TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
+    #         an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
+    def emulate_act_on_trainer(self, transition: Transition) -> ActionInfo:
+        """
+        This emulates the act using the transition obtained from the rollout worker on the training worker
+        in case of distributed training.
+        Get a decision of the next action to take.
+        The action is dependent on the current state which the agent holds from resetting the environment or from
+        the observe function.
+        :return: A tuple containing the actual action and additional info on the action
+        """
+        raise NotImplementedError("")