Add basic InferenceGraph deployment check (opendatahub-io#233)

israel-hdez · pre-commit-ci[bot] · rnetser · dbasunag · commit 32ee5f8f9afb · 2025-05-08T09:15:05.000-04:00
* Add basic InferenceGraph deployment check This adds a test that deploys an InferenceGraph (IG), sends an inference request to the IG and verifies that the request succeeds. The deployed InferenceGraph is based on the example on the KServe documentation available in the following URL: https://kserve.github.io/website/0.15/modelserving/inference_graph/image_pipeline/. The example was adapted to run in openvino (which is a supported server in ODH), rather than TorchServe. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use cloud storage in InferenceGraph test Use cloud storage for the models, instead of OCI * Feedback: Ruth * Feedback: Ruth * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply Ruth suggestions Acknowledgement to @rnester for these changes. * More feedback: Ruth * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ruth Netser <rnetser@redhat.com>
diff --git a/tests/model_serving/model_server/inference_graph/conftest.py b/tests/model_serving/model_server/inference_graph/conftest.py
@@ -0,0 +1,88 @@
+from typing import Generator, Any
+
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
+from ocp_resources.inference_service import InferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.secret import Secret
+from ocp_resources.serving_runtime import ServingRuntime
+
+from utilities.constants import ModelFormat, KServeDeploymentType, ModelStoragePath
+from utilities.inference_utils import create_isvc
+
+
+@pytest.fixture
+def dog_breed_inference_graph(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    dog_cat_inference_service: InferenceService,
+    dog_breed_inference_service: InferenceService,
+) -> Generator[InferenceGraph, Any, Any]:
+    nodes = {
+        "root": {
+            "routerType": "Sequence",
+            "steps": [
+                {"name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name},
+                {
+                    "name": "dog-breed-classifier",
+                    "serviceName": dog_breed_inference_service.name,
+                    "data": "$request",
+                    "condition": "[@this].#(outputs.0.data.1>=0)",
+                },
+            ],
+        }
+    }
+    with InferenceGraph(
+        client=admin_client,
+        name="dog-breed-pipeline",
+        namespace=model_namespace.name,
+        nodes=nodes,
+    ) as inference_graph:
+        inference_graph.wait_for_condition(condition=inference_graph.Condition.READY, status="True")
+        yield inference_graph
+
+
+@pytest.fixture
+def dog_cat_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime,
+    models_endpoint_s3_secret: Secret,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name="dog-cat-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_key=models_endpoint_s3_secret.name,
+        storage_path=ModelStoragePath.CAT_DOG_ONNX,
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2",
+    ) as isvc:
+        yield isvc
+
+
+@pytest.fixture
+def dog_breed_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime,
+    models_endpoint_s3_secret: Secret,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name="dog-breed-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_key=models_endpoint_s3_secret.name,
+        storage_path=ModelStoragePath.DOG_BREED_ONNX,
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2",
+    ) as isvc:
+        yield isvc
diff --git a/tests/model_serving/model_server/inference_graph/test_inference_graph_deployment.py b/tests/model_serving/model_server/inference_graph/test_inference_graph_deployment.py
@@ -0,0 +1,23 @@
+import pytest
+
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.inference_utils import Inference
+from utilities.constants import ModelInferenceRuntime, Protocols
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+
+
+@pytest.mark.parametrize(
+    "model_namespace,ovms_kserve_serving_runtime",
+    [pytest.param({"name": "kserve-inference-graph-deploy"}, {"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME})],
+    indirect=True,
+)
+class TestInferenceGraphDeployment:
+    def test_inference_graph_deployment(self, dog_breed_inference_graph):
+        verify_inference_response(
+            inference_service=dog_breed_inference_graph,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.GRAPH,
+            model_name="dog-breed-classifier",
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
diff --git a/tests/model_serving/model_server/utils.py b/tests/model_serving/model_server/utils.py
@@ -4,6 +4,7 @@
 from string import Template
 from typing import Any, Optional
 
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from simple_logger.logger import get_logger
 
@@ -17,7 +18,7 @@
 
 
 def verify_inference_response(
-    inference_service: InferenceService,
+    inference_service: InferenceService | InferenceGraph,
     inference_config: dict[str, Any],
     inference_type: str,
     protocol: str,
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -45,6 +45,8 @@ class ModelStoragePath:
     FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}"
     BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}"
     MNIST_8_ONNX: str = f"{ModelName.MNIST}-8.onnx"
+    DOG_BREED_ONNX: str = "dog_breed_classification"
+    CAT_DOG_ONNX: str = "cat_dog_classification"
 
 
 class CurlOutput:
diff --git a/utilities/inference_utils.py b/utilities/inference_utils.py
@@ -8,6 +8,7 @@
 from urllib.parse import urlparse
 
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.resource import get_client
 from ocp_resources.service import Service
@@ -44,15 +45,17 @@ class Inference:
     STREAMING: str = "streaming"
     INFER: str = "infer"
     MNIST: str = f"infer-{ModelName.MNIST}"
+    GRAPH: str = "graph"
 
-    def __init__(self, inference_service: InferenceService):
+    def __init__(self, inference_service: InferenceService | InferenceGraph):
         """
         Args:
             inference_service: InferenceService object
         """
         self.inference_service = inference_service
         self.deployment_mode = self.get_deployment_type()
-        self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
+        if isinstance(self.inference_service, InferenceService):
+            self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
         self.visibility_exposed = self.is_service_exposed()
 
         self.inference_url = self.get_inference_url()
@@ -69,7 +72,15 @@ def get_deployment_type(self) -> str:
         ):
             return deployment_type
 
-        return self.inference_service.instance.status.deploymentMode
+        if isinstance(self.inference_service, InferenceService):
+            return self.inference_service.instance.status.deploymentMode
+
+        elif isinstance(self.inference_service, InferenceGraph):
+            # TODO: Get deployment type from InferenceGraph once it is supported and added as `status.deploymentMode`
+            return KServeDeploymentType.SERVERLESS
+
+        else:
+            raise ValueError(f"Unknown inference service type: {self.inference_service.name}")
 
     def get_inference_url(self) -> str:
         """
@@ -83,20 +94,13 @@ def get_inference_url(self) -> str:
 
         """
         if self.visibility_exposed:
-            if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
-                url := self.inference_service.instance.status.components.predictor.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
-                url := self.inference_service.instance.status.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
+            if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
                 route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
                 return route.instance.spec.host
 
+            elif url := self.inference_service.instance.status.url:
+                return urlparse(url=url).netloc
+
             else:
                 raise ValueError(f"{self.inference_service.name}: No url found for inference")
 
@@ -113,7 +117,10 @@ def is_service_exposed(self) -> bool:
         """
         labels = self.inference_service.labels
 
-        if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
+        if (
+            isinstance(self.inference_service, InferenceService)
+            and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT
+        ):
             return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED
 
         if self.deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -528,6 +535,7 @@ def create_isvc(
     scale_target: int | None = None,
     model_env_variables: list[dict[str, str]] | None = None,
     teardown: bool = True,
+    protocol_version: str | None = None,
 ) -> Generator[InferenceService, Any, Any]:
     """
     Create InferenceService object.
@@ -561,6 +569,7 @@ def create_isvc(
         scale_target (int): Scale target
         model_env_variables (list[dict[str, str]]): Model environment variables
         teardown (bool): Teardown
+        protocol_version (str): Protocol version of the model server
 
     Yields:
         InferenceService: InferenceService object
@@ -610,12 +619,6 @@ def create_isvc(
     if deployment_mode:
         _annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}
 
-    if deployment_mode == KServeDeploymentType.SERVERLESS:
-        _annotations.update({
-            "serving.knative.openshift.io/enablePassthrough": "true",
-            "sidecar.istio.io/inject": "true",
-            "sidecar.istio.io/rewriteAppHTTPProbers": "true",
-        })
     if enable_auth:
         # model mesh auth is set in ServingRuntime
         if deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -646,6 +649,9 @@ def create_isvc(
     if scale_target is not None:
         predictor_dict["scaleTarget"] = scale_target
 
+    if protocol_version is not None:
+        predictor_dict["model"]["protocolVersion"] = protocol_version
+
     with InferenceService(
         client=client,
         name=name,
diff --git a/utilities/manifests/onnx.py b/utilities/manifests/onnx.py
diff --git a/utilities/manifests/openvino/dog-input-tensor.json b/utilities/manifests/openvino/dog-input-tensor.json