[RHOAIENG-25148] Addressed PR comments. Added more checks. Fixed teardown failure.

andresllh · andresllh · commit 699fc214204b · 2025-06-04T13:30:24.000-04:00
Signed-off-by: Andres Llausas &lt;allausas@redhat.com&gt;
diff --git a/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py b/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py
@@ -10,6 +10,7 @@
 from utilities.exceptions import DeploymentValidationError
 from utilities.constants import RunTimeConfigs, KServeDeploymentType, ModelVersion, Timeout
 from utilities.general import create_isvc_label_selector_str
+from ocp_resources.pod import Pod
 
 
 pytestmark = [
@@ -18,16 +19,17 @@
     pytest.mark.usefixtures("valid_aws_config"),
 ]
 
-local_queue_name = "local-queue-raw"
-cluster_queue_name = "cluster-queue-raw"
-resource_flavor_name = "default-flavor-raw"
-cpu_quota = 2
-memory_quota = "10Gi"
-isvc_resources = {"requests": {"cpu": "1", "memory": "8Gi"}, "limits": {"cpu": cpu_quota, "memory": memory_quota}}
-min_replicas = (
+NAMESPACE_NAME = "kueue-isvc-raw-test"
+LOCAL_QUEUE_NAME = "local-queue-raw"
+CLUSTER_QUEUE_NAME = "cluster-queue-raw"
+RESOURCE_FLAVOR_NAME = "default-flavor-raw"
+CPU_QUOTA = 2
+MEMORY_QUOTA = "10Gi"
+ISVC_RESOURCES = {"requests": {"cpu": "1", "memory": "8Gi"}, "limits": {"cpu": CPU_QUOTA, "memory": MEMORY_QUOTA}}
+MIN_REPLICAS = (
     1  # min_replicas needs to be 1 or you need to change the test to check for the number of available replicas
 )
-max_replicas = 2
+MAX_REPLICAS = 2
 
 
 @pytest.mark.rawdeployment
@@ -36,27 +38,28 @@
     "kueue_cluster_queue_from_template, kueue_resource_flavor_from_template, kueue_local_queue_from_template",
     [
         pytest.param(
-            {"name": "kueue-isvc-raw-test", "add-kueue-label": True},
+            {"name": NAMESPACE_NAME, "add-kueue-label": True},
             RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
             {
                 "name": "kueue-isvc-raw",
-                "min-replicas": min_replicas,
-                "max-replicas": max_replicas,
-                "labels": {"kueue.x-k8s.io/queue-name": local_queue_name},
+                "min-replicas": MIN_REPLICAS,
+                "max-replicas": MAX_REPLICAS,
+                "labels": {"kueue.x-k8s.io/queue-name": LOCAL_QUEUE_NAME},
                 "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT,
                 "model-dir": "test-dir",
                 "model-version": ModelVersion.OPSET13,
-                "resources": isvc_resources,
+                "resources": ISVC_RESOURCES,
             },
             {
-                "name": cluster_queue_name,
-                "resource_flavor_name": resource_flavor_name,
-                "cpu_quota": cpu_quota,
-                "memory_quota": memory_quota,
+                "name": CLUSTER_QUEUE_NAME,
+                "resource_flavor_name": RESOURCE_FLAVOR_NAME,
+                "cpu_quota": CPU_QUOTA,
+                "memory_quota": MEMORY_QUOTA,
+                # "namespace_selector": {"matchLabels": {"kubernetes.io/metadata.name": NAMESPACE_NAME}},
                 "namespace_selector": {},
             },
-            {"name": resource_flavor_name},
-            {"name": local_queue_name, "cluster_queue": cluster_queue_name},
+            {"name": RESOURCE_FLAVOR_NAME},
+            {"name": LOCAL_QUEUE_NAME, "cluster_queue": CLUSTER_QUEUE_NAME},
         )
     ],
     indirect=True,
@@ -67,8 +70,8 @@ class TestKueueInferenceServiceRaw:
     def test_kueue_inference_service_raw(
         self,
         admin_client,
-        kueue_cluster_queue_from_template,
         kueue_resource_flavor_from_template,
+        kueue_cluster_queue_from_template,
         kueue_local_queue_from_template,
         kueue_raw_inference_service,
         kueue_kserve_serving_runtime,
@@ -89,12 +92,14 @@ def test_kueue_inference_service_raw(
             )
         )
         if len(deployments) != 1:
-            raise DeploymentValidationError("Too many deployments found")
+            deployment_names = [deployment.instance.metadata.name for deployment in deployments]
+            raise DeploymentValidationError(f"Expected 1 deployment, got {len(deployments)}: {deployment_names}")
 
         deployment = deployments[0]
         deployment.wait_for_replicas(deployed=True)
-        if deployment.instance.spec.replicas != 1:
-            raise DeploymentValidationError("Deployment should have 1 replica")
+        replicas = deployment.instance.spec.replicas
+        if replicas != 1:
+            raise DeploymentValidationError(f"Deployment should have 1 replica, got {replicas}")
 
         # Update inference service to request 2 replicas
         isvc_to_update = kueue_raw_inference_service.instance.to_dict()
@@ -113,11 +118,55 @@ def test_kueue_inference_service_raw(
             )
         )
         if len(deployments) != 1:
-            raise DeploymentValidationError("Too many deployments found")
+            deployment_names = [deployment.instance.metadata.name for deployment in deployments]
+            raise DeploymentValidationError(f"Expected 1 deployment, got {len(deployments)}: {deployment_names}")
 
         deployment = deployments[0]
         try:
             deployment.wait_for_replicas(deployed=True, timeout=Timeout.TIMEOUT_30SEC)
-        except TimeoutExpiredError:
-            if deployment.instance.status.availableReplicas != 1:
-                raise DeploymentValidationError("Deployment should have 1 available replica") from None
+        except TimeoutExpiredError as e:
+            available_replicas = deployment.instance.status.availableReplicas
+            if available_replicas != 1:
+                raise DeploymentValidationError(
+                    f"Deployment should have 1 available replica, got {available_replicas}"
+                ) from None
+            # Get pods that match isvc labels and verify their status
+            pods = list(
+                Pod.get(
+                    label_selector=",".join(labels),
+                    namespace=kueue_raw_inference_service.namespace,
+                    dyn_client=admin_client,
+                )
+            )
+
+            if len(pods) != 2:
+                pod_names = [pod.instance.metadata.name for pod in pods]
+                raise DeploymentValidationError(f"Expected 2 pods, got {len(pods)}: {pod_names}") from e
+
+            running_pods = 0
+            gated_pods = 0
+            for pod in pods:
+                pod_phase = pod.instance.status.phase
+                if pod_phase == "Running":
+                    running_pods += 1
+                elif pod_phase == "Pending" and all(
+                    condition.type == "PodScheduled"
+                    and condition.status == "False"
+                    and condition.reason == "SchedulingGated"
+                    for condition in pod.instance.status.conditions
+                ):
+                    gated_pods += 1
+
+            if running_pods != 1 or gated_pods != 1:
+                raise DeploymentValidationError(
+                    f"Expected 1 Running pod and 1 SchedulingGated pod, "
+                    f"got {running_pods} Running and {gated_pods} SchedulingGated"
+                ) from e
+                # Check InferenceService status for total model copies
+            # Refresh the isvc instance to get latest status
+            kueue_raw_inference_service.get()
+            isvc = kueue_raw_inference_service.instance
+            if isvc.status.modelStatus.copies.totalCopies != 1:
+                raise DeploymentValidationError(
+                    f"InferenceService should have 1 total model copy, got {isvc.status.modelStatus.copies.totalCopies}"
+                ) from e
diff --git a/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py b/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py
@@ -5,6 +5,7 @@
 
 import pytest
 from ocp_resources.deployment import Deployment
+from ocp_resources.pod import Pod
 from timeout_sampler import TimeoutExpiredError
 from utilities.exceptions import DeploymentValidationError
 from utilities.constants import RunTimeConfigs, KServeDeploymentType, Timeout
@@ -18,16 +19,17 @@
     pytest.mark.usefixtures("valid_aws_config"),
 ]
 
-local_queue_name = "local-queue-serverless"
-cluster_queue_name = "cluster-queue-serverless"
-resource_flavor_name = "default-flavor-serverless"
-cpu_quota = 2
-memory_quota = "10Gi"
-isvc_resources = {"requests": {"cpu": "1", "memory": "8Gi"}, "limits": {"cpu": cpu_quota, "memory": memory_quota}}
-min_replicas = (
+NAMESPACE_NAME = "kueue-isvc-serverless-test"
+LOCAL_QUEUE_NAME = "local-queue-serverless"
+CLUSTER_QUEUE_NAME = "cluster-queue-serverless"
+RESOURCE_FLAVOR_NAME = "default-flavor-serverless"
+CPU_QUOTA = 2
+MEMORY_QUOTA = "10Gi"
+ISVC_RESOURCES = {"requests": {"cpu": "1", "memory": "8Gi"}, "limits": {"cpu": CPU_QUOTA, "memory": MEMORY_QUOTA}}
+MIN_REPLICAS = (
     1  # min_replicas needs to be 1 or you need to change the test to check for the number of available replicas
 )
-max_replicas = 2
+MAX_REPLICAS = 2
 
 
 @pytest.mark.serverless
@@ -36,26 +38,26 @@
     "kueue_cluster_queue_from_template, kueue_resource_flavor_from_template, kueue_local_queue_from_template",
     [
         pytest.param(
-            {"name": "kueue-isvc-serverless-test", "add-kueue-label": True},
+            {"name": NAMESPACE_NAME, "add-kueue-label": True},
             RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
             {
                 **ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
                 "name": "kueue",
-                "min-replicas": min_replicas,
-                "max-replicas": max_replicas,
-                "labels": {"kueue.x-k8s.io/queue-name": local_queue_name},
+                "min-replicas": MIN_REPLICAS,
+                "max-replicas": MAX_REPLICAS,
+                "labels": {"kueue.x-k8s.io/queue-name": LOCAL_QUEUE_NAME},
                 "deployment-mode": KServeDeploymentType.SERVERLESS,
-                "resources": isvc_resources,
+                "resources": ISVC_RESOURCES,
             },
             {
-                "name": cluster_queue_name,
-                "resource_flavor_name": resource_flavor_name,
-                "cpu_quota": cpu_quota,
-                "memory_quota": memory_quota,
-                "namespace_selector": {},
+                "name": CLUSTER_QUEUE_NAME,
+                "resource_flavor_name": RESOURCE_FLAVOR_NAME,
+                "cpu_quota": CPU_QUOTA,
+                "memory_quota": MEMORY_QUOTA,
+                "namespace_selector": {"matchLabels": {"kubernetes.io/metadata.name": NAMESPACE_NAME}},
             },
-            {"name": resource_flavor_name},
-            {"name": local_queue_name, "cluster_queue": cluster_queue_name},
+            {"name": RESOURCE_FLAVOR_NAME},
+            {"name": LOCAL_QUEUE_NAME, "cluster_queue": CLUSTER_QUEUE_NAME},
         )
     ],
     indirect=True,
@@ -66,8 +68,8 @@ class TestKueueInferenceServiceServerless:
     def test_kueue_inference_service_serverless(
         self,
         admin_client,
-        kueue_cluster_queue_from_template,
         kueue_resource_flavor_from_template,
+        kueue_cluster_queue_from_template,
         kueue_local_queue_from_template,
         kueue_kserve_inference_service,
         kueue_kserve_serving_runtime,
@@ -89,12 +91,14 @@ def test_kueue_inference_service_serverless(
             )
         )
         if len(deployments) != 1:
-            raise DeploymentValidationError("Too many deployments found")
+            deployment_names = [deployment.instance.metadata.name for deployment in deployments]
+            raise DeploymentValidationError(f"Expected 1 deployment, got {len(deployments)}: {deployment_names}")
 
         deployment = deployments[0]
         deployment.wait_for_replicas(deployed=True)
-        if deployment.instance.spec.replicas != 1:
-            raise DeploymentValidationError("Deployment should have 1 replica")
+        replicas = deployment.instance.spec.replicas
+        if replicas != 1:
+            raise DeploymentValidationError(f"Deployment should have 1 replica, got {replicas}")
 
         # Update inference service to request 2 replicas
         isvc_to_update = kueue_kserve_inference_service.instance.to_dict()
@@ -119,8 +123,49 @@ def test_kueue_inference_service_serverless(
                 # it means spec.replicas == status.replicas == status.updatedReplicas ==
                 # status.availableReplicas == status.readyReplicas
                 deployment.wait_for_replicas(deployed=True, timeout=Timeout.TIMEOUT_30SEC)
-                total_available_replicas += deployment.instance.spec.replicas
+                total_available_replicas += deployment.instance.status.availableReplicas
         except TimeoutExpiredError:
             pass
         if total_available_replicas != 1:
-            raise DeploymentValidationError("Total available replicas across all deployments should be 1")
+            raise DeploymentValidationError(
+                f"Total available replicas across all deployments should be 1, got {total_available_replicas}"
+            )
+        # Get pods that match isvc labels and verify their status
+        pods = list(
+            Pod.get(
+                label_selector=",".join(labels),
+                namespace=kueue_kserve_inference_service.namespace,
+                dyn_client=admin_client,
+            )
+        )
+
+        if len(pods) != 3:
+            pod_names = [pod.instance.metadata.name for pod in pods]
+            raise DeploymentValidationError(f"Expected 3 pods, got {len(pods)}: {pod_names}")
+
+        running_pods = 0
+        gated_pods = 0
+        for pod in pods:
+            pod_phase = pod.instance.status.phase
+            if pod_phase == "Running":
+                running_pods += 1
+            elif pod_phase == "Pending" and all(
+                condition.type == "PodScheduled"
+                and condition.status == "False"
+                and condition.reason == "SchedulingGated"
+                for condition in pod.instance.status.conditions
+            ):
+                gated_pods += 1
+
+        if running_pods != 1 or gated_pods != 2:
+            raise DeploymentValidationError(
+                f"Expected 1 Running pod and 2 SchedulingGated pods, "
+                f"got {running_pods} Running and {gated_pods} SchedulingGated"
+            )
+                    # Refresh the isvc instance to get latest status
+        kueue_kserve_inference_service.get()
+        isvc = kueue_kserve_inference_service.instance
+        if isvc.status.modelStatus.copies.totalCopies != 1:
+            raise DeploymentValidationError(
+                f"InferenceService should have 1 total model copy, got {isvc.status.modelStatus.copies.totalCopies}"
+            ) 
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -187,6 +187,11 @@ class Kserve:
     class Nvidia:
         NVIDIA_COM_GPU: str = "nvidia.com/gpu"
 
+    class Kueue:
+        # TODO: Change to kueue.openshift.io/managed once it's working
+        MANAGED: str = "kueue-managed"
+        # MANAGED: str = "kueue.openshift.io/managed"
+
 
 class Timeout:
     TIMEOUT_30SEC: int = 30
diff --git a/utilities/infra.py b/utilities/infra.py
@@ -127,7 +127,7 @@ def create_ns(
         namespace_kwargs["label"][Labels.OpenDataHub.DASHBOARD] = "true"  # type: ignore
 
     if add_kueue_label:
-        namespace_kwargs["label"]["kueue-managed"] = "true"  # type: ignore
+        namespace_kwargs["label"][Labels.Kueue.MANAGED] = "true"  # type: ignore
 
     if unprivileged_client:
         with ProjectRequest(name=name, client=unprivileged_client, teardown=teardown):