From 6ca91b9090ad2718b9e2296759a98a650db920f2 Mon Sep 17 00:00:00 2001
From: shadiendrawis <shadi.endrawis@intel.com>
Date: Sun, 3 Nov 2019 14:42:51 +0200
Subject: [PATCH] add reset internal state to rollout worker (#421)

---
 rl_coach/orchestrators/kubernetes_orchestrator.py | 4 ++--
 rl_coach/rollout_worker.py                        | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/rl_coach/orchestrators/kubernetes_orchestrator.py b/rl_coach/orchestrators/kubernetes_orchestrator.py
index 5d8a8ad..caf6a71 100644
--- a/rl_coach/orchestrators/kubernetes_orchestrator.py
+++ b/rl_coach/orchestrators/kubernetes_orchestrator.py
@@ -208,7 +208,7 @@ class Kubernetes(Deploy):
                 tty=True,
                 resources=k8sclient.V1ResourceRequirements(
                     limits={
-                        "cpu": "40",
+                        "cpu": "24",
                         "memory": "4Gi",
                         "nvidia.com/gpu": "1",
                     }
@@ -322,7 +322,7 @@ class Kubernetes(Deploy):
                 tty=True,
                 resources=k8sclient.V1ResourceRequirements(
                     limits={
-                        "cpu": "8",
+                        "cpu": "4",
                         "memory": "4Gi",
                         # "nvidia.com/gpu": "0",
                     }
diff --git a/rl_coach/rollout_worker.py b/rl_coach/rollout_worker.py
index eb71c65..f9e3a5a 100644
--- a/rl_coach/rollout_worker.py
+++ b/rl_coach/rollout_worker.py
@@ -31,6 +31,7 @@ import os
 from rl_coach.base_parameters import TaskParameters, DistributedCoachSynchronizationType
 from rl_coach.checkpoint import CheckpointStateFile, CheckpointStateReader
 from rl_coach.data_stores.data_store import SyncFiles
+from rl_coach.core_types import RunPhase
 
 
 def wait_for(wait_func, data_store=None, timeout=10):
@@ -71,7 +72,6 @@ def rollout_worker(graph_manager, data_store, num_workers, task_parameters):
     """
     wait for first checkpoint then perform rollouts using the model
     """
-    wait_for_trainer_ready(checkpoint_dir, data_store)
     if (
         graph_manager.agent_params.algorithm.distributed_coach_synchronization_type
         == DistributedCoachSynchronizationType.SYNC
@@ -87,6 +87,7 @@ def rollout_worker(graph_manager, data_store, num_workers, task_parameters):
 
     with graph_manager.phase_context(RunPhase.TRAIN):
         # this worker should play a fraction of the total playing steps per rollout
+        graph_manager.reset_internal_state(force_environment_reset=True)
 
         act_steps = (
             graph_manager.agent_params.algorithm.num_consecutive_playing_steps