2828from ocp_resources .pod import Pod
2929from ocp_resources .project_project_openshift_io import Project
3030from ocp_resources .project_request import ProjectRequest
31- from ocp_resources .resource import ResourceEditor , get_client
31+ from ocp_resources .resource import Resource , ResourceEditor , get_client
3232from ocp_resources .role import Role
3333from ocp_resources .route import Route
3434from ocp_resources .secret import Secret
@@ -640,6 +640,16 @@ def verify_no_failed_pods(
640640 ready_pods = 0
641641 failed_pods : dict [str , Any ] = {}
642642
643+ container_wait_base_errors = ["InvalidImageName" ]
644+ container_terminated_base_errors = [Resource .Status .ERROR ]
645+
646+ # For Model Mesh, if image pulling takes longer, pod may be in CrashLoopBackOff state but recover with retries.
647+ if (
648+ deployment_mode := isvc .instance .metadata .annotations .get ("serving.kserve.io/deploymentMode" )
649+ ) and deployment_mode != KServeDeploymentType .MODEL_MESH :
650+ container_wait_base_errors .append (Resource .Status .CRASH_LOOPBACK_OFF )
651+ container_terminated_base_errors .append (Resource .Status .CRASH_LOOPBACK_OFF )
652+
643653 if pods :
644654 for pod in pods :
645655 for condition in pod .instance .status .conditions :
@@ -658,17 +668,11 @@ def verify_no_failed_pods(
658668 ):
659669 is_waiting_pull_back_off = (
660670 wait_state := container_status .state .waiting
661- ) and wait_state .reason in (
662- pod .Status .CRASH_LOOPBACK_OFF ,
663- "InvalidImageName" ,
664- )
671+ ) and wait_state .reason in container_wait_base_errors
665672
666673 is_terminated_error = (
667674 terminate_state := container_status .state .terminated
668- ) and terminate_state .reason in (
669- pod .Status .ERROR ,
670- pod .Status .CRASH_LOOPBACK_OFF ,
671- )
675+ ) and terminate_state .reason in container_terminated_base_errors
672676
673677 if is_waiting_pull_back_off or is_terminated_error :
674678 failed_pods [pod .name ] = pod_status
0 commit comments