Skip to content

Commit c6503d9

Browse files
committed
Add basic InferenceGraph deployment check
This adds a test that deploys an InferenceGraph (IG), sends an inference request to the IG and verifies that the request succeeds. The deployed InferenceGraph is based on the example on the KServe documentation available in the following URL: https://kserve.github.io/website/0.15/modelserving/inference_graph/image_pipeline/. The example was adapted to run in openvino (which is a supported server in ODH), rather than TorchServe.
1 parent ac5a130 commit c6503d9

File tree

6 files changed

+21670
-37
lines changed

6 files changed

+21670
-37
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import time
2+
from typing import Generator, Any
3+
4+
import pytest
5+
from _pytest.fixtures import FixtureRequest
6+
from kubernetes.dynamic import DynamicClient
7+
from ocp_resources.inference_graph import InferenceGraph
8+
from ocp_resources.inference_service import InferenceService
9+
from ocp_resources.namespace import Namespace
10+
from ocp_resources.serving_runtime import ServingRuntime
11+
12+
from utilities.constants import ModelFormat, KServeDeploymentType
13+
from utilities.inference_utils import create_isvc
14+
15+
16+
@pytest.fixture
17+
def dog_breed_inference_graph(
18+
admin_client: DynamicClient,
19+
model_namespace: Namespace,
20+
dog_cat_inference_service: InferenceService,
21+
dog_breed_inference_service: InferenceService
22+
) -> Generator[InferenceGraph, Any, Any]:
23+
nodes={
24+
"root": {
25+
"routerType": "Sequence",
26+
"steps": [
27+
{ "name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name },
28+
{ "name": "dog-breed-classifier", "serviceName": dog_breed_inference_service.name, "data": "$request", "condition": "[@this].#(outputs.0.data.1>=0)" }
29+
],
30+
}
31+
}
32+
with InferenceGraph(
33+
client=admin_client,
34+
name="dog-breed-pipeline",
35+
namespace=model_namespace.name,
36+
nodes=nodes,
37+
) as inference_graph:
38+
inference_graph.wait_for_condition(inference_graph.Condition.READY, "True")
39+
time.sleep(5) # This wait helps OpenShift to fully enable the Route, if the InferenceGraph is exposed
40+
yield inference_graph
41+
42+
43+
@pytest.fixture
44+
def dog_cat_inference_service(
45+
request: FixtureRequest,
46+
admin_client: DynamicClient,
47+
model_namespace: Namespace,
48+
ovms_kserve_serving_runtime: ServingRuntime
49+
) -> Generator[InferenceService, Any, Any]:
50+
with create_isvc(
51+
client=admin_client,
52+
name=f"dog-cat-classifier",
53+
namespace=model_namespace.name,
54+
runtime=ovms_kserve_serving_runtime.name,
55+
storage_uri="oci://quay.io/edgarhz/oci-model-images:dog-cat-classifier-202504051400",
56+
model_format=ModelFormat.ONNX,
57+
deployment_mode=KServeDeploymentType.SERVERLESS,
58+
protocol_version="v2"
59+
) as isvc:
60+
yield isvc
61+
62+
63+
@pytest.fixture
64+
def dog_breed_inference_service(
65+
request: FixtureRequest,
66+
admin_client: DynamicClient,
67+
model_namespace: Namespace,
68+
ovms_kserve_serving_runtime: ServingRuntime
69+
) -> Generator[InferenceService, Any, Any]:
70+
with create_isvc(
71+
client=admin_client,
72+
name=f"dog-breed-classifier",
73+
namespace=model_namespace.name,
74+
runtime=ovms_kserve_serving_runtime.name,
75+
storage_uri="oci://quay.io/edgarhz/oci-model-images:dog-breed-classifier-202504051400",
76+
model_format=ModelFormat.ONNX,
77+
deployment_mode=KServeDeploymentType.SERVERLESS,
78+
protocol_version="v2"
79+
) as isvc:
80+
yield isvc
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.inference_graph.conftest import dog_breed_inference_graph
4+
from tests.model_serving.model_server.utils import verify_inference_response
5+
from utilities.inference_utils import Inference
6+
from utilities.constants import ModelInferenceRuntime, Protocols
7+
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
8+
9+
@pytest.mark.parametrize(
10+
"model_namespace,ovms_kserve_serving_runtime",
11+
[
12+
pytest.param(
13+
{"name": "kserve-inference-graph-deploy"},
14+
{"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME}
15+
)
16+
],
17+
indirect=True,
18+
)
19+
class TestInferenceGraphDeployment:
20+
def test_inference_graph_deployment(self, dog_breed_inference_graph):
21+
verify_inference_response(
22+
inference_service=dog_breed_inference_graph,
23+
inference_config=ONNX_INFERENCE_CONFIG,
24+
inference_type=Inference.GRAPH,
25+
protocol=Protocols.HTTPS,
26+
use_default_query=True,
27+
)

tests/model_serving/model_server/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from string import Template
77
from typing import Any, Optional
88

9+
from ocp_resources.inference_graph import InferenceGraph
910
from ocp_resources.inference_service import InferenceService
1011
from simple_logger.logger import get_logger
1112

@@ -19,7 +20,7 @@
1920

2021

2122
def verify_inference_response(
22-
inference_service: InferenceService,
23+
inference_service: InferenceService | InferenceGraph,
2324
inference_config: dict[str, Any],
2425
inference_type: str,
2526
protocol: str,

utilities/inference_utils.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from urllib.parse import urlparse
1111

1212
from kubernetes.dynamic import DynamicClient
13+
from ocp_resources.inference_graph import InferenceGraph
1314
from ocp_resources.inference_service import InferenceService
1415
from ocp_resources.resource import get_client
1516
from ocp_resources.service import Service
@@ -46,15 +47,17 @@ class Inference:
4647
STREAMING: str = "streaming"
4748
INFER: str = "infer"
4849
MNIST: str = f"infer-{ModelName.MNIST}"
50+
GRAPH: str = "graph"
4951

50-
def __init__(self, inference_service: InferenceService):
52+
def __init__(self, inference_service: InferenceService | InferenceGraph):
5153
"""
5254
Args:
5355
inference_service: InferenceService object
5456
"""
5557
self.inference_service = inference_service
5658
self.deployment_mode = self.get_deployment_type()
57-
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
59+
if isinstance(self.inference_service, InferenceService):
60+
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
5861
self.visibility_exposed = self.is_service_exposed()
5962

6063
self.inference_url = self.get_inference_url()
@@ -71,7 +74,10 @@ def get_deployment_type(self) -> str:
7174
):
7275
return deployment_type
7376

74-
return self.inference_service.instance.status.deploymentMode
77+
if isinstance(self.inference_service, InferenceService):
78+
return self.inference_service.instance.status.deploymentMode
79+
80+
return KServeDeploymentType.SERVERLESS
7581

7682
def get_inference_url(self) -> str:
7783
"""
@@ -85,20 +91,13 @@ def get_inference_url(self) -> str:
8591
8692
"""
8793
if self.visibility_exposed:
88-
if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
89-
url := self.inference_service.instance.status.components.predictor.url
90-
):
91-
return urlparse(url=url).netloc
92-
93-
elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
94-
url := self.inference_service.instance.status.url
95-
):
96-
return urlparse(url=url).netloc
97-
98-
elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
94+
if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
9995
route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
10096
return route.instance.spec.host
10197

98+
elif url := self.inference_service.instance.status.url:
99+
return urlparse(url=url).netloc
100+
102101
else:
103102
raise ValueError(f"{self.inference_service.name}: No url found for inference")
104103

@@ -115,7 +114,7 @@ def is_service_exposed(self) -> bool:
115114
"""
116115
labels = self.inference_service.labels
117116

118-
if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
117+
if isinstance(self.inference_service, InferenceService) and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
119118
return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED
120119

121120
if self.deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -529,6 +528,7 @@ def create_isvc(
529528
scale_metric: str | None = None,
530529
scale_target: int | None = None,
531530
model_env_variables: list[dict[str, str]] | None = None,
531+
protocol_version: str | None = None,
532532
) -> Generator[InferenceService, Any, Any]:
533533
"""
534534
Create InferenceService object.
@@ -561,6 +561,7 @@ def create_isvc(
561561
scale_metric (str): Scale metric
562562
scale_target (int): Scale target
563563
model_env_variables (list[dict[str, str]]): Model environment variables
564+
protocol_version (str): Protocol version of the model server
564565
565566
Yields:
566567
InferenceService: InferenceService object
@@ -610,12 +611,6 @@ def create_isvc(
610611
if deployment_mode:
611612
_annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}
612613

613-
if deployment_mode == KServeDeploymentType.SERVERLESS:
614-
_annotations.update({
615-
"serving.knative.openshift.io/enablePassthrough": "true",
616-
"sidecar.istio.io/inject": "true",
617-
"sidecar.istio.io/rewriteAppHTTPProbers": "true",
618-
})
619614
if enable_auth:
620615
# model mesh auth is set in ServingRuntime
621616
if deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -646,6 +641,9 @@ def create_isvc(
646641
if scale_target is not None:
647642
predictor_dict["scaleTarget"] = scale_target
648643

644+
if protocol_version is not None:
645+
predictor_dict["model"]["protocolVersion"] = protocol_version
646+
649647
with InferenceService(
650648
client=client,
651649
name=name,

0 commit comments

Comments
 (0)