summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrope252 <gareth.roper@est.tech>2021-08-31 21:11:24 +0100
committerGareth <Gareth.roper@est.tech>2021-09-07 16:54:53 +0100
commitd8ce8e5b3ac7ffdb6b7f77c73f0f53f1ea0f92ec (patch)
treef5b07f592d0b69dc4054707a0caa3348a27af6ca
parentf3d32d5467d57a835c82a519ac92b610d1cd5ded (diff)
Update ready.py script to enable checking container status within a Job4.0.0
This new function will check the status of the main container within a job, waiting for it to finish before sending a call to the quitquitquit api Change-Id: If0f1859183ff5239b74e416b97d0464a3e88bc9d Issue-ID: OOM-2816 Signed-off-by: rope252 <gareth.roper@est.tech>
-rwxr-xr-xready.py81
1 files changed, 78 insertions, 3 deletions
diff --git a/ready.py b/ready.py
index 3956049..85c7f8a 100755
--- a/ready.py
+++ b/ready.py
@@ -216,6 +216,40 @@ def is_ready(container_name):
log.error("Exception when calling list_namespaced_pod: %s\n", exc)
return ready
+def service_mesh_job_check(container_name):
+ """
+ Check if a Job's primary container is complete. Used for ensuring the sidecar can be killed after Job completion.
+ Args:
+ container_name (str): the name of the Job's primary container.
+
+ Returns:
+ True if job's container is in the completed state, false otherwise
+ """
+ complete = False
+ log.info("Checking if %s is complete", container_name)
+ try:
+ response = coreV1Api.list_namespaced_pod(namespace=namespace, watch=False)
+ for item in response.items:
+ # container_statuses can be None, which is non-iterable.
+ if item.status.container_statuses is None:
+ continue
+ for container in item.status.container_statuses:
+ if container.name == container_name:
+ name = read_name(item)
+ log.info("Container Details %s ", container)
+ log.info("Container Status %s ", container.state.terminated)
+ if container.state.terminated is None:
+ continue
+ log.info("Container Status Reason %s ", container.state.terminated.reason)
+ if container.state.terminated.reason == 'Completed':
+ complete = True
+ log.info("%s is complete", container_name)
+ else:
+ log.info("%s is NOT complete", container_name)
+ except ApiException as exc:
+ log.error("Exception when calling read_namespaced_job_status: %s\n",
+ exc)
+ return complete
def read_name(item):
"""
@@ -245,8 +279,22 @@ def get_deployment_name(replicaset):
deployment_name = read_name(api_response)
return deployment_name
+def quitquitquit_post(apiurl):
+ URL = apiurl
+ response = requests.post(url = URL)
+ responseStatus = response.ok
+ try:
+ if responseStatus is True:
+ log.info("quitquitquit returned True")
+ return True
+ else:
+ log.info("quitquitquit returned False")
+ return False
+ except:
+ log.info("quitquitquit call failed with exception")
DEF_TIMEOUT = 10
+DEF_URL = "http://127.0.0.1:15020/quitquitquit"
DESCRIPTION = "Kubernetes container readiness check utility"
USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> .. | -j <job_name> .. \n" \
"where\n" \
@@ -258,7 +306,7 @@ USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> .. | -j <job_name> .
def main(argv):
"""
- Checks if a container is ready or if a job is finished.
+ Checks if a container is ready, if a job is finished or if the main container of a job has completed.
The check is done according to the name of the container, not the name of
its parent (Job, Deployment, StatefulSet, DaemonSet).
@@ -268,10 +316,14 @@ def main(argv):
# args are a list of container names
container_names = []
job_names = []
+ service_mesh_job_container_names = []
timeout = DEF_TIMEOUT
+ url = DEF_URL
try:
- opts, _args = getopt.getopt(argv, "hj:c:t:", ["container-name=",
+ opts, _args = getopt.getopt(argv, "hj:c:t:s:u:", ["container-name=",
"timeout=",
+ "service-mesh-check=",
+ "url=",
"job-name=",
"help"])
for opt, arg in opts:
@@ -282,13 +334,17 @@ def main(argv):
container_names.append(arg)
elif opt in ("-j", "--job-name"):
job_names.append(arg)
+ elif opt in ("-s", "--service-mesh-check"):
+ service_mesh_job_container_names.append(arg)
+ elif opt in ("-u", "--url"):
+ url = arg
elif opt in ("-t", "--timeout"):
timeout = float(arg)
except (getopt.GetoptError, ValueError) as exc:
print("Error parsing input parameters: {}\n".format(exc))
print(USAGE)
sys.exit(2)
- if container_names.__len__() == 0 and job_names.__len__() == 0:
+ if container_names.__len__() == 0 and job_names.__len__() == 0 and service_mesh_job_container_names.__len__() == 0:
print("Missing required input parameter(s)\n")
print(USAGE)
sys.exit(2)
@@ -321,6 +377,25 @@ def main(argv):
# spread in time potentially parallel execution in multiple
# containers
time.sleep(random.randint(5, 11))
+ for service_mesh_job_container_name in service_mesh_job_container_names:
+ timeout = time.time() + timeout * 60
+ while True:
+ ready = service_mesh_job_check(service_mesh_job_container_name)
+ if ready is True:
+ sideCarKilled = quitquitquit_post(url)
+ if sideCarKilled is True:
+ log.info("Side Car Killed through QuitQuitQuit API")
+ else:
+ log.info("Side Car Failed to be Killed through QuitQuitQuit API")
+ break
+ if time.time() > timeout:
+ log.warning("timed out waiting for '%s' to be ready",
+ job_name)
+ sys.exit(1)
+ else:
+ # spread in time potentially parallel execution in multiple
+ # containers
+ time.sleep(random.randint(5, 11))
if __name__ == "__main__":
main(sys.argv[1:])