diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py
index a6b5297..7d6bf92 100644
--- a/rl_coach/agents/agent.py
+++ b/rl_coach/agents/agent.py
@@ -609,35 +609,35 @@ class Agent(AgentInterface):
         :return:  boolean: True if we should start a training phase
         """
 
-        should_update = self._should_train_helper()
+        should_update = self._should_update()
 
-        step_method = self.ap.algorithm.num_consecutive_playing_steps
+        steps = self.ap.algorithm.num_consecutive_playing_steps
 
         if should_update:
-            if step_method.__class__ == EnvironmentEpisodes:
+            if steps.__class__ == EnvironmentEpisodes:
                 self.last_training_phase_step = self.current_episode
-            if step_method.__class__ == EnvironmentSteps:
+            if steps.__class__ == EnvironmentSteps:
                 self.last_training_phase_step = self.total_steps_counter
 
         return should_update
 
-    def _should_train_helper(self):
+    def _should_update(self):
         wait_for_full_episode = self.ap.algorithm.act_for_full_episodes
-        step_method = self.ap.algorithm.num_consecutive_playing_steps
+        steps = self.ap.algorithm.num_consecutive_playing_steps
 
-        if step_method.__class__ == EnvironmentEpisodes:
-            should_update = (self.current_episode - self.last_training_phase_step) >= step_method.num_steps
+        if steps.__class__ == EnvironmentEpisodes:
+            should_update = (self.current_episode - self.last_training_phase_step) >= steps.num_steps
             should_update = should_update and self.call_memory('length') > 0
 
-        elif step_method.__class__ == EnvironmentSteps:
-            should_update = (self.total_steps_counter - self.last_training_phase_step) >= step_method.num_steps
+        elif steps.__class__ == EnvironmentSteps:
+            should_update = (self.total_steps_counter - self.last_training_phase_step) >= steps.num_steps
             should_update = should_update and self.call_memory('num_transitions') > 0
 
             if wait_for_full_episode:
                 should_update = should_update and self.current_episode_buffer.is_complete
         else:
             raise ValueError("The num_consecutive_playing_steps parameter should be either "
-                             "EnvironmentSteps or Episodes. Instead it is {}".format(step_method.__class__))
+                             "EnvironmentSteps or Episodes. Instead it is {}".format(steps.__class__))
 
         return should_update
 
diff --git a/rl_coach/agents/dfp_agent.py b/rl_coach/agents/dfp_agent.py
index 604fb5c..bc989bc 100644
--- a/rl_coach/agents/dfp_agent.py
+++ b/rl_coach/agents/dfp_agent.py
@@ -84,7 +84,8 @@ class DFPAlgorithmParameters(AlgorithmParameters):
     """
     :param num_predicted_steps_ahead: (int)
         Number of future steps to predict measurements for. The future steps won't be sequential, but rather jump
-        in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4
+        in multiples of 2. For example, if num_predicted_steps_ahead = 3, then the steps will be: t+1, t+2, t+4.
+        The predicted steps will be [t + 2**i for i in range(num_predicted_steps_ahead)]
 
     :param goal_vector: (List[float])
         The goal vector will weight each of the measurements to form an optimization goal. The vector should have
diff --git a/rl_coach/architectures/tensorflow_components/general_network.py b/rl_coach/architectures/tensorflow_components/general_network.py
index 5c70707..1a657b3 100644
--- a/rl_coach/architectures/tensorflow_components/general_network.py
+++ b/rl_coach/architectures/tensorflow_components/general_network.py
@@ -125,22 +125,25 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
         super().__init__(agent_parameters, spaces, name, global_network,
                          network_is_local, network_is_trainable)
 
-        def fill_return_types():
-            ret_dict = {}
-            for cls in get_all_subclasses(PredictionType):
-                ret_dict[cls] = []
-            components = self.input_embedders + [self.middleware] + self.output_heads
-            for component in components:
-                if not hasattr(component, 'return_type'):
-                    raise ValueError("{} has no return_type attribute. This should not happen.")
-                if component.return_type is not None:
-                    ret_dict[component.return_type].append(component)
-
-            return ret_dict
-
-        self.available_return_types = fill_return_types()
+        self.available_return_types = self._available_return_types()
         self.is_training = None
 
+    def _available_return_types(self):
+        ret_dict = {cls: [] for cls in get_all_subclasses(PredictionType)}
+
+        components = self.input_embedders + [self.middleware] + self.output_heads
+        for component in components:
+            if not hasattr(component, 'return_type'):
+                raise ValueError((
+                    "{} has no return_type attribute. Without this, it is "
+                    "unclear how this component should be used."
+                ).format(component))
+
+            if component.return_type is not None:
+                ret_dict[component.return_type].append(component)
+
+        return ret_dict
+
     def predict_with_prediction_type(self, states: Dict[str, np.ndarray],
                                      prediction_type: PredictionType) -> Dict[str, np.ndarray]:
         """
diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py
index d615b48..726f536 100644
--- a/rl_coach/core_types.py
+++ b/rl_coach/core_types.py
@@ -98,6 +98,10 @@ class Embedding(PredictionType):
     pass
 
 
+class Measurements(PredictionType):
+    pass
+
+
 class InputEmbedding(Embedding):
     pass
 
@@ -126,10 +130,6 @@ class Middleware_LSTM_Embedding(MiddlewareEmbedding):
     pass
 
 
-class Measurements(PredictionType):
-    pass
-
-
 PlayingStepsType = Union[EnvironmentSteps, EnvironmentEpisodes, Frames]
 
 
diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py
index d13a59b..7c7b9d4 100644
--- a/rl_coach/graph_managers/graph_manager.py
+++ b/rl_coach/graph_managers/graph_manager.py
@@ -346,6 +346,10 @@ class GraphManager(object):
 
     @contextlib.contextmanager
     def phase_context(self, phase):
+        """
+        Create a context which temporarily sets the phase to the provided phase.
+        The previous phase is restored afterwards.
+        """
         old_phase = self.phase
         self.phase = phase
         yield
@@ -464,8 +468,11 @@ class GraphManager(object):
 
                 count_end = self.current_step_counter + steps
                 while self.current_step_counter < count_end:
-                    # The actual steps being done on the environment are decided by the agents themselves.
-                    # This is just an high-level controller.
+                    # The actual number of steps being done on the environment
+                    # is decided by the agent, though this inner loop always
+                    # takes at least one step in the environment. Depending on
+                    # internal counters and parameters, it doesn't always train
+                    # or save checkpoints.
                     self.act(EnvironmentSteps(1))
                     self.train()
                     self.occasionally_save_checkpoint()
diff --git a/rl_coach/level_manager.py b/rl_coach/level_manager.py
index 91a7a19..2bbfad2 100644
--- a/rl_coach/level_manager.py
+++ b/rl_coach/level_manager.py
@@ -271,7 +271,7 @@ class LevelManager(EnvironmentInterface):
         [agent.sync() for agent in self.agents.values()]
 
     def should_train(self) -> bool:
-        return any([agent._should_train_helper() for agent in self.agents.values()])
+        return any([agent._should_update() for agent in self.agents.values()])
 
     # TODO-remove - this is a temporary flow, used by the trainer worker, duplicated from observe() - need to create
     #               an external trainer flow reusing the existing flow and methods [e.g. observe(), step(), act()]
diff --git a/rl_coach/rollout_worker.py b/rl_coach/rollout_worker.py
index 6d53e02..52b6bc4 100644
--- a/rl_coach/rollout_worker.py
+++ b/rl_coach/rollout_worker.py
@@ -76,6 +76,7 @@ def rollout_worker(graph_manager, data_store, num_workers, task_parameters):
         chkpt_state_reader = CheckpointStateReader(checkpoint_dir, checkpoint_state_optional=False)
         last_checkpoint = 0
 
+        # this worker should play a fraction of the total playing steps per rollout
         act_steps = math.ceil((graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps)/num_workers)
 
         for i in range(int(graph_manager.improve_steps.num_steps/act_steps)):