opendatahub-io · rnetser · Apr 28, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 14, 2025
@@ -0,0 +1,88 @@
+from typing import Generator, Any
+
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
+from ocp_resources.inference_service import InferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.secret import Secret
+from ocp_resources.serving_runtime import ServingRuntime
+
+from utilities.constants import ModelFormat, KServeDeploymentType, ModelStoragePath
+from utilities.inference_utils import create_isvc
+
+
+@pytest.fixture
+def dog_breed_inference_graph(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    dog_cat_inference_service: InferenceService,
+    dog_breed_inference_service: InferenceService,
+) -> Generator[InferenceGraph, Any, Any]:
+    nodes = {
+        "root": {
+            "routerType": "Sequence",
+            "steps": [
+                {"name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name},
+                {
+                    "name": "dog-breed-classifier",
+                    "serviceName": dog_breed_inference_service.name,
+                    "data": "$request",
+                    "condition": "[@this].#(outputs.0.data.1>=0)",
+                },
+            ],
+        }
+    }
+    with InferenceGraph(
+        client=admin_client,
+        name="dog-breed-pipeline",
+        namespace=model_namespace.name,
+        nodes=nodes,
+    ) as inference_graph:
+        inference_graph.wait_for_condition(condition=inference_graph.Condition.READY, status="True")
+        yield inference_graph
+
+
+@pytest.fixture
+def dog_cat_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime,
+    models_endpoint_s3_secret: Secret,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name="dog-cat-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_key=models_endpoint_s3_secret.name,
+        storage_path=ModelStoragePath.CAT_DOG_ONNX,
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2",
+    ) as isvc:
+        yield isvc
+
+
+@pytest.fixture
+def dog_breed_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_kserve_serving_runtime: ServingRuntime,
+    models_endpoint_s3_secret: Secret,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name="dog-breed-classifier",
+        namespace=model_namespace.name,
+        runtime=ovms_kserve_serving_runtime.name,
+        storage_key=models_endpoint_s3_secret.name,
+        storage_path=ModelStoragePath.DOG_BREED_ONNX,
+        model_format=ModelFormat.ONNX,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        protocol_version="v2",
+    ) as isvc:
+        yield isvc
@@ -0,0 +1,23 @@
+import pytest
+
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.inference_utils import Inference
+from utilities.constants import ModelInferenceRuntime, Protocols
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+
+
+@pytest.mark.parametrize(
+    "model_namespace,ovms_kserve_serving_runtime",
+    [pytest.param({"name": "kserve-inference-graph-deploy"}, {"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME})],
+    indirect=True,
+)
+class TestInferenceGraphDeployment:
+    def test_inference_graph_deployment(self, dog_breed_inference_graph):
+        verify_inference_response(
+            inference_service=dog_breed_inference_graph,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.GRAPH,
+            model_name="dog-breed-classifier",
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
@@ -4,6 +4,7 @@
 from string import Template
 from typing import Any, Optional
 
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from simple_logger.logger import get_logger
 
@@ -17,7 +18,7 @@
 
 
 def verify_inference_response(
-    inference_service: InferenceService,
+    inference_service: InferenceService | InferenceGraph,
     inference_config: dict[str, Any],
     inference_type: str,
     protocol: str,

@@ -45,6 +45,8 @@ class ModelStoragePath:
     FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}"
     BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}"
     MNIST_8_ONNX: str = f"{ModelName.MNIST}-8.onnx"
+    DOG_BREED_ONNX: str = "dog_breed_classification"
+    CAT_DOG_ONNX: str = "cat_dog_classification"
 
 
 class CurlOutput:

@@ -8,6 +8,7 @@
 from urllib.parse import urlparse
 
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_graph import InferenceGraph
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.resource import get_client
 from ocp_resources.service import Service
@@ -44,15 +45,17 @@ class Inference:
     STREAMING: str = "streaming"
     INFER: str = "infer"
     MNIST: str = f"infer-{ModelName.MNIST}"
+    GRAPH: str = "graph"
 
-    def __init__(self, inference_service: InferenceService):
+    def __init__(self, inference_service: InferenceService | InferenceGraph):
         """
         Args:
             inference_service: InferenceService object
         """
         self.inference_service = inference_service
         self.deployment_mode = self.get_deployment_type()
-        self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
+        if isinstance(self.inference_service, InferenceService):
+            self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
         self.visibility_exposed = self.is_service_exposed()
 
         self.inference_url = self.get_inference_url()
@@ -69,7 +72,15 @@ def get_deployment_type(self) -> str:
         ):
             return deployment_type
 
-        return self.inference_service.instance.status.deploymentMode
+        if isinstance(self.inference_service, InferenceService):
+            return self.inference_service.instance.status.deploymentMode
+
+        elif isinstance(self.inference_service, InferenceGraph):
+            # TODO: Get deployment type from InferenceGraph once it is supported and added as `status.deploymentMode`
+            return KServeDeploymentType.SERVERLESS
+
+        else:
+            raise ValueError(f"Unknown inference service type: {self.inference_service.name}")
 
     def get_inference_url(self) -> str:
         """
@@ -83,20 +94,13 @@ def get_inference_url(self) -> str:
 
         """
         if self.visibility_exposed:
-            if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
-                url := self.inference_service.instance.status.components.predictor.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
-                url := self.inference_service.instance.status.url
-            ):
-                return urlparse(url=url).netloc
-
-            elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
+            if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
                 route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
                 return route.instance.spec.host
 
+            elif url := self.inference_service.instance.status.url:
+                return urlparse(url=url).netloc
+
             else:
                 raise ValueError(f"{self.inference_service.name}: No url found for inference")
 
@@ -113,7 +117,10 @@ def is_service_exposed(self) -> bool:
         """
         labels = self.inference_service.labels
 
-        if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
+        if (
+            isinstance(self.inference_service, InferenceService)
+            and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT
+        ):
             return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED
 
         if self.deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -528,6 +535,7 @@ def create_isvc(
     scale_target: int | None = None,
     model_env_variables: list[dict[str, str]] | None = None,
     teardown: bool = True,
+    protocol_version: str | None = None,
 ) -> Generator[InferenceService, Any, Any]:
     """
     Create InferenceService object.
@@ -561,6 +569,7 @@ def create_isvc(
         scale_target (int): Scale target
         model_env_variables (list[dict[str, str]]): Model environment variables
         teardown (bool): Teardown
+        protocol_version (str): Protocol version of the model server
 
     Yields:
         InferenceService: InferenceService object
@@ -610,12 +619,6 @@ def create_isvc(
     if deployment_mode:
         _annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}
 
-    if deployment_mode == KServeDeploymentType.SERVERLESS:
-        _annotations.update({
-            "serving.knative.openshift.io/enablePassthrough": "true",
-            "sidecar.istio.io/inject": "true",
-            "sidecar.istio.io/rewriteAppHTTPProbers": "true",
-        })
     if enable_auth:
         # model mesh auth is set in ServingRuntime
         if deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -646,6 +649,9 @@ def create_isvc(
     if scale_target is not None:
         predictor_dict["scaleTarget"] = scale_target
 
+    if protocol_version is not None:
+        predictor_dict["model"]["protocolVersion"] = protocol_version
+
     with InferenceService(
         client=client,
         name=name,