opendatahub-io · dbasunag · May 20, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 28, 2025
@@ -1,5 +1,4 @@
 import pytest
-from ocp_resources.deployment import Deployment
 
 from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
 from tests.model_serving.model_server.utils import verify_inference_response
@@ -10,9 +9,9 @@
     Protocols,
     RunTimeConfigs,
 )
-from utilities.exceptions import DeploymentValidationError
 from utilities.inference_utils import Inference
 from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+from utilities.infra import wait_for_inference_deployment_replicas
 
 pytestmark = [
     pytest.mark.serverless,
@@ -95,14 +94,9 @@ def test_no_serverless_pods_when_no_traffic(self, unprivileged_client, inference
     @pytest.mark.order(5)
     def test_serverless_pods_after_scale_to_one_replica(self, unprivileged_client, inference_service_patched_replicas):
         """Verify pod is running after scaling to 1 replica"""
-        for deployment in Deployment.get(
+        wait_for_inference_deployment_replicas(
             client=unprivileged_client,
-            namespace=inference_service_patched_replicas.namespace,
-        ):
-            if deployment.labels["serving.knative.dev/configurationGeneration"] == "3":
-                deployment.wait_for_replicas()
-                return
-
-        raise DeploymentValidationError(
-            f"Inference Service {inference_service_patched_replicas.name} new deployment not found"
+            isvc=inference_service_patched_replicas,
+            expected_num_deployments=1,
+            labels="serving.knative.dev/configurationGeneration=3",
         )
@@ -53,8 +53,9 @@
     FailedPodsError,
     ResourceNotReadyError,
 )
-from timeout_sampler import TimeoutExpiredError, TimeoutSampler, retry
+from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch, retry
 import utilities.general
+from ocp_resources.utils.constants import DEFAULT_CLUSTER_RETRY_EXCEPTIONS
 
 LOGGER = get_logger(name=__name__)
 
@@ -151,13 +152,14 @@ def create_ns(
                 wait_for_serverless_pods_deletion(resource=ns, admin_client=client)
 
 
-def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int) -> None:
+def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int, timeout: int = Timeout.TIMEOUT_2MIN) -> None:
     """
     Wait for replicas in deployment to updated in spec.
 
     Args:
         deployment (Deployment): Deployment object
         replicas (int): number of replicas to be set in spec.replicas
+        timeout (int): Time to wait for the model deployment.
 
     Raises:
         TimeoutExpiredError: If replicas are not updated in spec.
@@ -167,7 +169,7 @@ def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int) -> No
 
     try:
         for sample in TimeoutSampler(
-            wait_timeout=Timeout.TIMEOUT_2MIN,
+            wait_timeout=timeout,
             sleep=5,
             func=lambda: deployment.instance,
         ):
@@ -186,6 +188,8 @@ def wait_for_inference_deployment_replicas(
     isvc: InferenceService,
     runtime_name: str | None = None,
     expected_num_deployments: int = 1,
+    labels: str = "",
+    deployed: bool = True,
     timeout: int = Timeout.TIMEOUT_5MIN,
 ) -> list[Deployment]:
     """
@@ -196,49 +200,75 @@ def wait_for_inference_deployment_replicas(
         isvc (InferenceService): InferenceService object
         runtime_name (str): ServingRuntime name.
         expected_num_deployments (int): Expected number of deployments per InferenceService.
+        labels (str): Comma seperated list of labels, in key=value format, used to filter deployments.
+        deployed (bool): True for replicas deployed, False for no replicas.
         timeout (int): Time to wait for the model deployment.
 
     Returns:
         list[Deployment]: List of Deployment objects for InferenceService.
 
+    Raises:
+        TimeoutExpiredError: If an exception is raised when retrieving deployments or
+                             timeout expires when checking replicas.
+        ResourceNotUniqueError: If a greater number of deployments exist than expected after timeout.
+        ResourceNotFoundError: If a less number of deployments exist than expected after timeout.
+
     """
+    timeout_watcher = TimeoutWatch(timeout=timeout)
     ns = isvc.namespace
     label_selector = utilities.general.create_isvc_label_selector_str(
         isvc=isvc, resource_type="deployment", runtime_name=runtime_name
     )
+    if labels:
+        label_selector += f",{labels}"
 
-    deployments = list(
-        Deployment.get(
+    deployment_list = []
+    try:
+        for deployments in TimeoutSampler(
+            wait_timeout=timeout_watcher.remaining_time(),
+            sleep=5,
+            exceptions_dict=DEFAULT_CLUSTER_RETRY_EXCEPTIONS,
+            func=Deployment.get,
             label_selector=label_selector,
             client=client,
-            namespace=isvc.namespace,
-        )
-    )
+            namespace=ns,
+        ):
+            deployment_list = list(deployments)
+            if len(deployment_list) == expected_num_deployments:
+                break
+    except TimeoutExpiredError as e:
+        # If the last exception raised prior to the timeout expiring is None, this means that
+        # the deployments were successfully retrieved, but the expected number was not found.
+        if e.last_exp is None:
+            if len(deployment_list) > expected_num_deployments:
+                raise ResourceNotUniqueError(
+                    f"Too many predictor deployments found in namespace {ns} after timeout. "
+                    f"Expected {expected_num_deployments}, but found {len(deployment_list)}."
+                )
+            raise ResourceNotFoundError(
+                f"Predictor deployment(s) not found in namespace {ns} after timeout. "
+                f"Expected {expected_num_deployments}, but found {len(deployment_list)}."
+            )
+        raise
 
     LOGGER.info("Waiting for inference deployment replicas to complete")
-    if len(deployments) == expected_num_deployments:
-        for deployment in deployments:
-            if deployment.exists:
-                # Raw deployment: if min replicas is more than 1, wait for min replicas
-                # to be set in deployment spec by HPA
-                if (
-                    isvc.instance.metadata.annotations.get("serving.kserve.io/deploymentMode")
-                    == KServeDeploymentType.RAW_DEPLOYMENT
-                ):
-                    wait_for_replicas_in_deployment(
-                        deployment=deployments[0],
-                        replicas=isvc.instance.spec.predictor.get("minReplicas", 1),
-                    )
-
-                deployment.wait_for_replicas(timeout=timeout)
-
-        return deployments
+    for deployment in deployment_list:
+        if deployment.exists:
+            # Raw deployment: if min replicas is more than 1, wait for min replicas
+            # to be set in deployment spec by HPA
+            if (
+                isvc.instance.metadata.annotations.get("serving.kserve.io/deploymentMode")
+                == KServeDeploymentType.RAW_DEPLOYMENT
+            ):
+                wait_for_replicas_in_deployment(
+                    deployment=deployment,
+                    replicas=isvc.instance.spec.predictor.get("minReplicas", 1),
+                    timeout=timeout_watcher.remaining_time(),
+                )
 
-    elif len(deployments) > expected_num_deployments:
-        raise ResourceNotUniqueError(f"Multiple predictor deployments found in namespace {ns}")
+            deployment.wait_for_replicas(deployed=deployed, timeout=timeout_watcher.remaining_time())
 
-    else:
-        raise ResourceNotFoundError(f"Predictor deployment not found in namespace {ns}")
+    return deployment_list
 
 
 @contextmanager