@@ -116,9 +116,38 @@ function util::wait_ip_reachable {
116116# Check Pod status in a namespace.
117117function util::check_pods_status {
118118 local kubeconfig=${1:- " " }
119- local namespace=${2:- " hami-system" }
119+ local namespace=${2:- " " }
120+ local retries=${3:- 10}
121+ local interval=${4:- 30}
122+
123+ local attempt=0
120124 local unhealthy_pods
121- unhealthy_pods=$( kubectl get po -n " $namespace " --kubeconfig " $kubeconfig " --no-headers | awk ' !/Running|Succeeded/ {print $1}' )
125+
126+ while (( attempt < retries )) ; do
127+ echo " Checking Pod status (Attempt $(( attempt + 1 )) /$retries )..."
128+
129+ # Checking unhealthy pods in namespaces,ignore the Running & Succeeded status
130+ if [[ -z " $namespace " ]]; then
131+ unhealthy_pods=$( kubectl get po -A --kubeconfig " $kubeconfig " --no-headers --ignore-not-found | awk ' !/Running|Succeeded|Completed/ {print $2}' )
132+ else
133+ unhealthy_pods=$( kubectl get po -n " $namespace " --kubeconfig " $kubeconfig " --no-headers --ignore-not-found | awk ' !/Running|Succeeded|Completed/ {print $1}' )
134+ fi
135+
136+ if [[ -z " $unhealthy_pods " ]]; then
137+ echo " PASS: All Pods are in Running or Succeeded state."
138+ return 0
139+ fi
140+
141+ echo " Found unhealthy pods:"
142+ echo " $unhealthy_pods "
143+
144+ if (( attempt < retries - 1 )) ; then
145+ echo " Retrying pod check in ${interval} s..."
146+ sleep " $interval "
147+ fi
148+
149+ (( attempt++ ))
150+ done
122151
123152 if [[ -n " $unhealthy_pods " ]]; then
124153 echo " Found unhealthy pods in namespace $namespace :"
@@ -134,8 +163,5 @@ function util::check_pods_status {
134163 done
135164
136165 return 1
137- else
138- echo " PASS: All Pods are in Running state."
139- return 0
140166 fi
141- }
167+ }
0 commit comments