From 06e6b4ba3e0eecbe88093bce00aa64ab7aff6a60 Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Wed, 19 Jun 2024 12:35:02 +0530 Subject: [PATCH] snc-library: Delete the failed pods before check for available one Sometime pods goes to `ContainerStatusUnknown` state where it is not able to send the status to kubelet and it stays there till manually deleted and due to it our snc script fails. In this PR we are deleting the pods which are in failed state (which is the same for ContainerStatusUnknown one) and then checks the pods availablity. ``` + sleep 256 + all_pods_are_running_completed none + local ignoreNamespace=none + ./openshift-clients/linux/oc get pod --no-headers --all-namespaces '--field-selector=metadata.namespace!=none' + grep -v Running + grep -v Completed openshift-kube-apiserver installer-11-crc 0/1 ContainerStatusUnknown 1 19m + exit=1 + wait=512 + count=10 + '[' 10 -lt 10 ']' + echo 'Retry 10/10 exited 1, no more retries left.' Retry 10/10 exited 1, no more retries left. ``` fixes: #920 --- snc-library.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/snc-library.sh b/snc-library.sh index 8a3b2f96..e3310ae2 100755 --- a/snc-library.sh +++ b/snc-library.sh @@ -241,8 +241,13 @@ function no_operators_degraded() { ${OC} get co -ojsonpath='{.items[*].status.conditions[?(@.type=="Degraded")].status}' | grep -v True } +function retry_failed_pods() { + ${OC} delete pods --field-selector=status.phase=Failed -A +} + function all_pods_are_running_completed() { local ignoreNamespace=$1 + retry_failed_pods ! ${OC} get pod --no-headers --all-namespaces --field-selector=metadata.namespace!="${ignoreNamespace}" | grep -v Running | grep -v Completed }