Skip to content

Commit 32ee5f8

Browse files
israel-hdezpre-commit-ci[bot]rnetser
authored andcommitted
Add basic InferenceGraph deployment check (opendatahub-io#233)
* Add basic InferenceGraph deployment check This adds a test that deploys an InferenceGraph (IG), sends an inference request to the IG and verifies that the request succeeds. The deployed InferenceGraph is based on the example on the KServe documentation available in the following URL: https://kserve.github.io/website/0.15/modelserving/inference_graph/image_pipeline/. The example was adapted to run in openvino (which is a supported server in ODH), rather than TorchServe. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use cloud storage in InferenceGraph test Use cloud storage for the models, instead of OCI * Feedback: Ruth * Feedback: Ruth * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply Ruth suggestions Acknowledgement to @rnester for these changes. * More feedback: Ruth * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ruth Netser <rnetser@redhat.com>
1 parent f9b785f commit 32ee5f8

File tree

7 files changed

+21684
-37
lines changed

7 files changed

+21684
-37
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from typing import Generator, Any
2+
3+
import pytest
4+
from _pytest.fixtures import FixtureRequest
5+
from kubernetes.dynamic import DynamicClient
6+
from ocp_resources.inference_graph import InferenceGraph
7+
from ocp_resources.inference_service import InferenceService
8+
from ocp_resources.namespace import Namespace
9+
from ocp_resources.secret import Secret
10+
from ocp_resources.serving_runtime import ServingRuntime
11+
12+
from utilities.constants import ModelFormat, KServeDeploymentType, ModelStoragePath
13+
from utilities.inference_utils import create_isvc
14+
15+
16+
@pytest.fixture
17+
def dog_breed_inference_graph(
18+
admin_client: DynamicClient,
19+
model_namespace: Namespace,
20+
dog_cat_inference_service: InferenceService,
21+
dog_breed_inference_service: InferenceService,
22+
) -> Generator[InferenceGraph, Any, Any]:
23+
nodes = {
24+
"root": {
25+
"routerType": "Sequence",
26+
"steps": [
27+
{"name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name},
28+
{
29+
"name": "dog-breed-classifier",
30+
"serviceName": dog_breed_inference_service.name,
31+
"data": "$request",
32+
"condition": "[@this].#(outputs.0.data.1>=0)",
33+
},
34+
],
35+
}
36+
}
37+
with InferenceGraph(
38+
client=admin_client,
39+
name="dog-breed-pipeline",
40+
namespace=model_namespace.name,
41+
nodes=nodes,
42+
) as inference_graph:
43+
inference_graph.wait_for_condition(condition=inference_graph.Condition.READY, status="True")
44+
yield inference_graph
45+
46+
47+
@pytest.fixture
48+
def dog_cat_inference_service(
49+
request: FixtureRequest,
50+
admin_client: DynamicClient,
51+
model_namespace: Namespace,
52+
ovms_kserve_serving_runtime: ServingRuntime,
53+
models_endpoint_s3_secret: Secret,
54+
) -> Generator[InferenceService, Any, Any]:
55+
with create_isvc(
56+
client=admin_client,
57+
name="dog-cat-classifier",
58+
namespace=model_namespace.name,
59+
runtime=ovms_kserve_serving_runtime.name,
60+
storage_key=models_endpoint_s3_secret.name,
61+
storage_path=ModelStoragePath.CAT_DOG_ONNX,
62+
model_format=ModelFormat.ONNX,
63+
deployment_mode=KServeDeploymentType.SERVERLESS,
64+
protocol_version="v2",
65+
) as isvc:
66+
yield isvc
67+
68+
69+
@pytest.fixture
70+
def dog_breed_inference_service(
71+
request: FixtureRequest,
72+
admin_client: DynamicClient,
73+
model_namespace: Namespace,
74+
ovms_kserve_serving_runtime: ServingRuntime,
75+
models_endpoint_s3_secret: Secret,
76+
) -> Generator[InferenceService, Any, Any]:
77+
with create_isvc(
78+
client=admin_client,
79+
name="dog-breed-classifier",
80+
namespace=model_namespace.name,
81+
runtime=ovms_kserve_serving_runtime.name,
82+
storage_key=models_endpoint_s3_secret.name,
83+
storage_path=ModelStoragePath.DOG_BREED_ONNX,
84+
model_format=ModelFormat.ONNX,
85+
deployment_mode=KServeDeploymentType.SERVERLESS,
86+
protocol_version="v2",
87+
) as isvc:
88+
yield isvc
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.utils import verify_inference_response
4+
from utilities.inference_utils import Inference
5+
from utilities.constants import ModelInferenceRuntime, Protocols
6+
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
7+
8+
9+
@pytest.mark.parametrize(
10+
"model_namespace,ovms_kserve_serving_runtime",
11+
[pytest.param({"name": "kserve-inference-graph-deploy"}, {"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME})],
12+
indirect=True,
13+
)
14+
class TestInferenceGraphDeployment:
15+
def test_inference_graph_deployment(self, dog_breed_inference_graph):
16+
verify_inference_response(
17+
inference_service=dog_breed_inference_graph,
18+
inference_config=ONNX_INFERENCE_CONFIG,
19+
inference_type=Inference.GRAPH,
20+
model_name="dog-breed-classifier",
21+
protocol=Protocols.HTTPS,
22+
use_default_query=True,
23+
)

tests/model_serving/model_server/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from string import Template
55
from typing import Any, Optional
66

7+
from ocp_resources.inference_graph import InferenceGraph
78
from ocp_resources.inference_service import InferenceService
89
from simple_logger.logger import get_logger
910

@@ -17,7 +18,7 @@
1718

1819

1920
def verify_inference_response(
20-
inference_service: InferenceService,
21+
inference_service: InferenceService | InferenceGraph,
2122
inference_config: dict[str, Any],
2223
inference_type: str,
2324
protocol: str,

utilities/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ class ModelStoragePath:
4545
FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}"
4646
BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}"
4747
MNIST_8_ONNX: str = f"{ModelName.MNIST}-8.onnx"
48+
DOG_BREED_ONNX: str = "dog_breed_classification"
49+
CAT_DOG_ONNX: str = "cat_dog_classification"
4850

4951

5052
class CurlOutput:

utilities/inference_utils.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from urllib.parse import urlparse
99

1010
from kubernetes.dynamic import DynamicClient
11+
from ocp_resources.inference_graph import InferenceGraph
1112
from ocp_resources.inference_service import InferenceService
1213
from ocp_resources.resource import get_client
1314
from ocp_resources.service import Service
@@ -44,15 +45,17 @@ class Inference:
4445
STREAMING: str = "streaming"
4546
INFER: str = "infer"
4647
MNIST: str = f"infer-{ModelName.MNIST}"
48+
GRAPH: str = "graph"
4749

48-
def __init__(self, inference_service: InferenceService):
50+
def __init__(self, inference_service: InferenceService | InferenceGraph):
4951
"""
5052
Args:
5153
inference_service: InferenceService object
5254
"""
5355
self.inference_service = inference_service
5456
self.deployment_mode = self.get_deployment_type()
55-
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
57+
if isinstance(self.inference_service, InferenceService):
58+
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
5659
self.visibility_exposed = self.is_service_exposed()
5760

5861
self.inference_url = self.get_inference_url()
@@ -69,7 +72,15 @@ def get_deployment_type(self) -> str:
6972
):
7073
return deployment_type
7174

72-
return self.inference_service.instance.status.deploymentMode
75+
if isinstance(self.inference_service, InferenceService):
76+
return self.inference_service.instance.status.deploymentMode
77+
78+
elif isinstance(self.inference_service, InferenceGraph):
79+
# TODO: Get deployment type from InferenceGraph once it is supported and added as `status.deploymentMode`
80+
return KServeDeploymentType.SERVERLESS
81+
82+
else:
83+
raise ValueError(f"Unknown inference service type: {self.inference_service.name}")
7384

7485
def get_inference_url(self) -> str:
7586
"""
@@ -83,20 +94,13 @@ def get_inference_url(self) -> str:
8394
8495
"""
8596
if self.visibility_exposed:
86-
if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
87-
url := self.inference_service.instance.status.components.predictor.url
88-
):
89-
return urlparse(url=url).netloc
90-
91-
elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
92-
url := self.inference_service.instance.status.url
93-
):
94-
return urlparse(url=url).netloc
95-
96-
elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
97+
if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
9798
route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
9899
return route.instance.spec.host
99100

101+
elif url := self.inference_service.instance.status.url:
102+
return urlparse(url=url).netloc
103+
100104
else:
101105
raise ValueError(f"{self.inference_service.name}: No url found for inference")
102106

@@ -113,7 +117,10 @@ def is_service_exposed(self) -> bool:
113117
"""
114118
labels = self.inference_service.labels
115119

116-
if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
120+
if (
121+
isinstance(self.inference_service, InferenceService)
122+
and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT
123+
):
117124
return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED
118125

119126
if self.deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -528,6 +535,7 @@ def create_isvc(
528535
scale_target: int | None = None,
529536
model_env_variables: list[dict[str, str]] | None = None,
530537
teardown: bool = True,
538+
protocol_version: str | None = None,
531539
) -> Generator[InferenceService, Any, Any]:
532540
"""
533541
Create InferenceService object.
@@ -561,6 +569,7 @@ def create_isvc(
561569
scale_target (int): Scale target
562570
model_env_variables (list[dict[str, str]]): Model environment variables
563571
teardown (bool): Teardown
572+
protocol_version (str): Protocol version of the model server
564573
565574
Yields:
566575
InferenceService: InferenceService object
@@ -610,12 +619,6 @@ def create_isvc(
610619
if deployment_mode:
611620
_annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}
612621

613-
if deployment_mode == KServeDeploymentType.SERVERLESS:
614-
_annotations.update({
615-
"serving.knative.openshift.io/enablePassthrough": "true",
616-
"sidecar.istio.io/inject": "true",
617-
"sidecar.istio.io/rewriteAppHTTPProbers": "true",
618-
})
619622
if enable_auth:
620623
# model mesh auth is set in ServingRuntime
621624
if deployment_mode == KServeDeploymentType.SERVERLESS:
@@ -646,6 +649,9 @@ def create_isvc(
646649
if scale_target is not None:
647650
predictor_dict["scaleTarget"] = scale_target
648651

652+
if protocol_version is not None:
653+
predictor_dict["model"]["protocolVersion"] = protocol_version
654+
649655
with InferenceService(
650656
client=client,
651657
name=name,

0 commit comments

Comments
 (0)