network_imporvements branch merge

2026-02-23 02:25:50 +01:00 · 2018-10-02 13:41:46 +03:00
parent 72ea933384
commit 51726a5b80
110 changed files with 1639 additions and 1161 deletions
--- a/rl_coach/agents/policy_optimization_agent.py
+++ b/rl_coach/agents/policy_optimization_agent.py
@@ -93,6 +93,8 @@ class PolicyOptimizationAgent(Agent):

        total_loss = 0
        if num_steps_passed_since_last_update > 0:
+            for network in self.networks.values():
+                network.set_is_training(True)

            # we need to update the returns of the episode until now
            episode.update_returns()
@@ -124,6 +126,9 @@ class PolicyOptimizationAgent(Agent):
                    network.apply_gradients_and_sync_networks()
            self.training_iteration += 1

+            for network in self.networks.values():
+                network.set_is_training(False)
+
            # run additional commands after the training is done
            self.post_training_commands()