cleanup graph_manager:act

2026-02-15 13:35:55 +01:00 · 2018-10-01 17:01:03 -04:00
parent b5305bd075
commit 9f92064e67
1 changed files with 7 additions and 21 deletions
--- a/rl_coach/graph_managers/graph_manager.py
+++ b/rl_coach/graph_managers/graph_manager.py
@@ -336,20 +336,7 @@ class GraphManager(object):
        """
        self.verify_graph_was_created()

-        # perform several steps of training interleaved with acting
-        count_end = self.total_steps_counters[RunPhase.TRAIN][TrainingSteps] + steps.num_steps
-        while self.total_steps_counters[RunPhase.TRAIN][TrainingSteps] < count_end:
-            self.total_steps_counters[RunPhase.TRAIN][TrainingSteps] += 1
-            [manager.train() for manager in self.level_managers]
-
-        # # option 1
-        # for _ in StepsLoop(self.total_steps_counters, RunPhase.TRAIN, steps):
-        #     [manager.train() for manager in self.level_managers]
-        #
-        # # option 2
-        # steps_loop = StepsLoop(self.total_steps_counters, RunPhase.TRAIN, steps)
-        # while steps_loop or other:
-        #     [manager.train() for manager in self.level_managers]
+        [manager.train() for manager in self.level_managers]


    def reset_internal_state(self, force_environment_reset=False) -> None:
@@ -383,21 +370,21 @@ class GraphManager(object):
        # perform several steps of playing
        result = None

-        hold_until_a_full_episode = True if continue_until_game_over else False
        initial_count = self.total_steps_counters[self.phase][steps.__class__]
        count_end = initial_count + steps.num_steps

        # The assumption here is that the total_steps_counters are each updated when an event
        #  takes place (i.e. an episode ends)
        # TODO - The counter of frames is not updated correctly. need to fix that.
-        while self.total_steps_counters[self.phase][steps.__class__] < count_end or hold_until_a_full_episode:
+        while self.total_steps_counters[self.phase][steps.__class__] < count_end or continue_until_game_over:
            # reset the environment if the previous episode was terminated
            if self.reset_required:
                self.reset_internal_state()

-            current_steps = self.environments[0].total_steps_counter
-
+            steps_begin = self.environments[0].total_steps_counter
            result = self.top_level_manager.step(None)
+            steps_end = self.environments[0].total_steps_counter
+
            # result will be None if at least one level_manager decided not to play (= all of his agents did not play)
            # causing the rest of the level_managers down the stack not to play either, and thus the entire graph did
            # not act
@@ -408,11 +395,10 @@ class GraphManager(object):
            # (like in Atari) will not be counted.
            # We add at least one step so that even if no steps were made (in case no actions are taken in the training
            # phase), the loop will end eventually.
-            self.total_steps_counters[self.phase][EnvironmentSteps] += \
-                max(1, self.environments[0].total_steps_counter - current_steps)
+            self.total_steps_counters[self.phase][EnvironmentSteps] += max(1, steps_end - steps_begin)

            if result.game_over:
-                hold_until_a_full_episode = False
+                continue_until_game_over = False
                self.handle_episode_ended()
                # TODO: why not just reset right now?
                self.reset_required = True