mirror of
https://github.com/gryf/coach.git
synced 2025-12-17 19:20:19 +01:00
prevent long job CI timeouts owing to lack of EKS token refresh (#183)
* add additional info during exception of eks runs. * ensure we refresh k8s config after long calls. Kubernetes client on EKS has a 10 minute token time to live, so will result in unauthorized errors if tokens are not refreshed on long jobs.
This commit is contained in:
committed by
Ajay Deshpande
parent
0fa9d8e602
commit
053adf0ca9
@@ -15,12 +15,17 @@ class EKSHandler():
|
|||||||
self.image = image
|
self.image = image
|
||||||
self.cpu = cpu
|
self.cpu = cpu
|
||||||
self.memory = memory
|
self.memory = memory
|
||||||
config.load_kube_config()
|
self.refresh_config()
|
||||||
self.namespace = '{}-{}'.format(test_name, build_num)
|
self.namespace = '{}-{}'.format(test_name, build_num)
|
||||||
self.corev1_api = client.CoreV1Api()
|
|
||||||
self.create_namespace()
|
self.create_namespace()
|
||||||
self.working_dir = working_dir
|
self.working_dir = working_dir
|
||||||
|
|
||||||
|
def refresh_config(self):
|
||||||
|
# on AWS tokens only last 10 minutes so this must periodically be
|
||||||
|
# called to prevent auth related errors
|
||||||
|
config.load_kube_config()
|
||||||
|
self.corev1_api = client.CoreV1Api()
|
||||||
|
|
||||||
def create_namespace(self):
|
def create_namespace(self):
|
||||||
namespace = client.V1Namespace(
|
namespace = client.V1Namespace(
|
||||||
api_version='v1',
|
api_version='v1',
|
||||||
@@ -73,13 +78,17 @@ class EKSHandler():
|
|||||||
_preload_content=False
|
_preload_content=False
|
||||||
):
|
):
|
||||||
print(line.decode('utf-8'), flush=True, end='')
|
print(line.decode('utf-8'), flush=True, end='')
|
||||||
|
# above call blocks for pod lifetime, so we may need to refresh tokens
|
||||||
|
self.refresh_config()
|
||||||
|
|
||||||
except client.rest.ApiException as e:
|
except client.rest.ApiException as e:
|
||||||
|
print("Got exception: {} while reading pod logs".format(e))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
|
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
|
||||||
except client.rest.ApiException as e:
|
except client.rest.ApiException as e:
|
||||||
|
print("Got exception: {} while reading pod".format(e))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not hasattr(pod, 'status') or not pod.status:
|
if not hasattr(pod, 'status') or not pod.status:
|
||||||
@@ -104,6 +113,7 @@ class EKSHandler():
|
|||||||
try:
|
try:
|
||||||
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
|
pod = self.corev1_api.read_namespaced_pod(self.test_name, self.namespace)
|
||||||
except client.rest.ApiException as e:
|
except client.rest.ApiException as e:
|
||||||
|
print("Got exception: {} while reading pod".format(e))
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not hasattr(pod, 'status') or not pod.status:
|
if not hasattr(pod, 'status') or not pod.status:
|
||||||
|
|||||||
Reference in New Issue
Block a user