Add basic InferenceGraph deployment check

israel-hdez · israel-hdez · commit c6503d9877a9 · 2025-04-09T18:14:34.000-06:00
This adds a test that deploys an InferenceGraph (IG), sends an inference request to the IG and verifies that the request succeeds. The deployed InferenceGraph is based on the example on the KServe documentation available in the following URL: https://kserve.github.io/website/0.15/modelserving/inference_graph/image_pipeline/. The example was adapted to run in openvino (which is a supported server in ODH), rather than TorchServe.
diff --git a/tests/model_serving/model_server/inference_graph/conftest.py b/tests/model_serving/model_server/inference_graph/conftest.py
@@ -0,0 +1,80 @@
+import time
+from typing import Generator, Any
+
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
+from ocp_resources.inference_service import InferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.serving_runtime import ServingRuntime
+
+from utilities.constants import ModelFormat, KServeDeploymentType
+from utilities.inference_utils import create_isvc
+
+
+@pytest.fixture
+def dog_breed_inference_graph(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    dog_cat_inference_service: InferenceService,
+    dog_breed_inference_service: InferenceService
+) -> Generator[InferenceGraph, Any, Any]:
+    nodes={
+        "root": {
+            "routerType": "Sequence",
+            "steps": [
+                { "name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name },
+                { "name": "dog-breed-classifier", "serviceName": dog_breed_inference_service.name, "data": "$request", "condition": "[@this].#(outputs.0.data.1>=0)" }
+            ],
+        }
+    }
+    with InferenceGraph(
+        client=admin_client,
+        name="dog-breed-pipeline",
+        namespace=model_namespace.name,
+        nodes=nodes,
+    ) as inference_graph:
+        inference_graph.wait_for_condition(inference_graph.Condition.READY, "True")
+        time.sleep(5) # This wait helps OpenShift to fully enable the Route, if the InferenceGraph is exposed
+        yield inference_graph
+
+
+@pytest.fixture
+def dog_cat_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name=f"dog-cat-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_uri="oci://quay.io/edgarhz/oci-model-images:dog-cat-classifier-202504051400",
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2"
+    ) as isvc:
+        yield isvc
+
+
+@pytest.fixture
+def dog_breed_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name=f"dog-breed-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_uri="oci://quay.io/edgarhz/oci-model-images:dog-breed-classifier-202504051400",
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2"
+    ) as isvc:
+        yield isvc
diff --git a/tests/model_serving/model_server/inference_graph/test_inference_graph_deployment.py b/tests/model_serving/model_server/inference_graph/test_inference_graph_deployment.py
@@ -0,0 +1,27 @@
+import pytest
+
+from tests.model_serving.model_server.inference_graph.conftest import dog_breed_inference_graph
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.inference_utils import Inference
+from utilities.constants import ModelInferenceRuntime, Protocols
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+
+@pytest.mark.parametrize(
+    "model_namespace,ovms_kserve_serving_runtime",
+    [
+        pytest.param(
+            {"name": "kserve-inference-graph-deploy"},
+            {"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME}
+        )
+    ],
+    indirect=True,
+)
+class TestInferenceGraphDeployment:
+    def test_inference_graph_deployment(self, dog_breed_inference_graph):
+        verify_inference_response(
+            inference_service=dog_breed_inference_graph,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.GRAPH,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
diff --git a/tests/model_serving/model_server/utils.py b/tests/model_serving/model_server/utils.py
@@ -6,6 +6,7 @@
 from string import Template
 from typing import Any, Optional
 
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from simple_logger.logger import get_logger
 
@@ -19,7 +20,7 @@
 
 
 def verify_inference_response(
-    inference_service: InferenceService,
+    inference_service: InferenceService | InferenceGraph,
     inference_config: dict[str, Any],
     inference_type: str,
     protocol: str,
diff --git a/utilities/inference_utils.py b/utilities/inference_utils.py
@@ -10,6 +10,7 @@
 from urllib.parse import urlparse
 
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.resource import get_client
 from ocp_resources.service import Service
@@ -46,15 +47,17 @@ class Inference:
     STREAMING: str = "streaming"
     INFER: str = "infer"
     MNIST: str = f"infer-{ModelName.MNIST}"
+    GRAPH: str = "graph"
 
-    def __init__(self, inference_service: InferenceService):
+    def __init__(self, inference_service: InferenceService | InferenceGraph):
         """
         Args:
             inference_service: InferenceService object
         """
         self.inference_service = inference_service
         self.deployment_mode = self.get_deployment_type()
-        self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
+        if isinstance(self.inference_service, InferenceService):
+            self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
         self.visibility_exposed = self.is_service_exposed()
 
         self.inference_url = self.get_inference_url()
@@ -71,7 +74,10 @@ def get_deployment_type(self) -> str:
         ):
             return deployment_type
 
-        return self.inference_service.instance.status.deploymentMode
+        if isinstance(self.inference_service, InferenceService):
+            return self.inference_service.instance.status.deploymentMode
+
+        return KServeDeploymentType.SERVERLESS
 
     def get_inference_url(self) -> str:
         """
@@ -85,20 +91,13 @@ def get_inference_url(self) -> str:
 
         """
         if self.visibility_exposed:
-            if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
-                url := self.inference_service.instance.status.components.predictor.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
-                url := self.inference_service.instance.status.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
+            if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
                 route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
                 return route.instance.spec.host
 
+            elif url := self.inference_service.instance.status.url:
+                return urlparse(url=url).netloc
+
             else:
                 raise ValueError(f"{self.inference_service.name}: No url found for inference")
 
@@ -115,7 +114,7 @@ def is_service_exposed(self) -> bool:
         """
         labels = self.inference_service.labels
 
-        if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
+        if isinstance(self.inference_service, InferenceService) and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
             return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED
 
         if self.deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -529,6 +528,7 @@ def create_isvc(
     scale_metric: str | None = None,
     scale_target: int | None = None,
     model_env_variables: list[dict[str, str]] | None = None,
+    protocol_version: str | None = None,
 ) -> Generator[InferenceService, Any, Any]:
     """
     Create InferenceService object.
@@ -561,6 +561,7 @@ def create_isvc(
         scale_metric (str): Scale metric
         scale_target (int): Scale target
         model_env_variables (list[dict[str, str]]): Model environment variables
+        protocol_version (str): Protocol version of the model server
 
     Yields:
         InferenceService: InferenceService object
@@ -610,12 +611,6 @@ def create_isvc(
     if deployment_mode:
         _annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}
 
-    if deployment_mode == KServeDeploymentType.SERVERLESS:
-        _annotations.update({
-            "serving.knative.openshift.io/enablePassthrough": "true",
-            "sidecar.istio.io/inject": "true",
-            "sidecar.istio.io/rewriteAppHTTPProbers": "true",
-        })
     if enable_auth:
         # model mesh auth is set in ServingRuntime
         if deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -646,6 +641,9 @@ def create_isvc(
     if scale_target is not None:
         predictor_dict["scaleTarget"] = scale_target
 
+    if protocol_version is not None:
+        predictor_dict["model"]["protocolVersion"] = protocol_version
+
     with InferenceService(
         client=client,
         name=name,
diff --git a/utilities/manifests/onnx.py b/utilities/manifests/onnx.py
diff --git a/utilities/manifests/openvino/dog-input-tensor.json b/utilities/manifests/openvino/dog-input-tensor.json