Skip to content
88 changes: 88 additions & 0 deletions tests/model_serving/model_server/inference_graph/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from typing import Generator, Any

import pytest
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from ocp_resources.inference_graph import InferenceGraph
from ocp_resources.inference_service import InferenceService
from ocp_resources.namespace import Namespace
from ocp_resources.secret import Secret
from ocp_resources.serving_runtime import ServingRuntime

from utilities.constants import ModelFormat, KServeDeploymentType, ModelStoragePath
from utilities.inference_utils import create_isvc


@pytest.fixture
def dog_breed_inference_graph(
admin_client: DynamicClient,
model_namespace: Namespace,
dog_cat_inference_service: InferenceService,
dog_breed_inference_service: InferenceService,
) -> Generator[InferenceGraph, Any, Any]:
nodes = {
"root": {
"routerType": "Sequence",
"steps": [
{"name": "dog-cat-classifier", "serviceName": dog_cat_inference_service.name},
{
"name": "dog-breed-classifier",
"serviceName": dog_breed_inference_service.name,
"data": "$request",
"condition": "[@this].#(outputs.0.data.1>=0)",
},
],
}
}
with InferenceGraph(
client=admin_client,
name="dog-breed-pipeline",
namespace=model_namespace.name,
nodes=nodes,
) as inference_graph:
inference_graph.wait_for_condition(condition=inference_graph.Condition.READY, status="True")
yield inference_graph


@pytest.fixture
def dog_cat_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
ovms_kserve_serving_runtime: ServingRuntime,
models_endpoint_s3_secret: Secret,
) -> Generator[InferenceService, Any, Any]:
with create_isvc(
client=admin_client,
name="dog-cat-classifier",
namespace=model_namespace.name,
runtime=ovms_kserve_serving_runtime.name,
storage_key=models_endpoint_s3_secret.name,
storage_path=ModelStoragePath.CAT_DOG_ONNX,
model_format=ModelFormat.ONNX,
deployment_mode=KServeDeploymentType.SERVERLESS,
protocol_version="v2",
) as isvc:
yield isvc


@pytest.fixture
def dog_breed_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
ovms_kserve_serving_runtime: ServingRuntime,
models_endpoint_s3_secret: Secret,
) -> Generator[InferenceService, Any, Any]:
with create_isvc(
client=admin_client,
name="dog-breed-classifier",
namespace=model_namespace.name,
runtime=ovms_kserve_serving_runtime.name,
storage_key=models_endpoint_s3_secret.name,
storage_path=ModelStoragePath.DOG_BREED_ONNX,
model_format=ModelFormat.ONNX,
deployment_mode=KServeDeploymentType.SERVERLESS,
protocol_version="v2",
) as isvc:
yield isvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from tests.model_serving.model_server.utils import verify_inference_response
from utilities.inference_utils import Inference
from utilities.constants import ModelInferenceRuntime, Protocols
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG


@pytest.mark.parametrize(
"model_namespace,ovms_kserve_serving_runtime",
[pytest.param({"name": "kserve-inference-graph-deploy"}, {"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME})],
indirect=True,
)
class TestInferenceGraphDeployment:
def test_inference_graph_deployment(self, dog_breed_inference_graph):
verify_inference_response(
inference_service=dog_breed_inference_graph,
inference_config=ONNX_INFERENCE_CONFIG,
inference_type=Inference.GRAPH,
model_name="dog-breed-classifier",
protocol=Protocols.HTTPS,
use_default_query=True,
)
3 changes: 2 additions & 1 deletion tests/model_serving/model_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from string import Template
from typing import Any, Optional

from ocp_resources.inference_graph import InferenceGraph
from ocp_resources.inference_service import InferenceService
from simple_logger.logger import get_logger

Expand All @@ -17,7 +18,7 @@


def verify_inference_response(
inference_service: InferenceService,
inference_service: InferenceService | InferenceGraph,
inference_config: dict[str, Any],
inference_type: str,
protocol: str,
Expand Down
2 changes: 2 additions & 0 deletions utilities/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class ModelStoragePath:
FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}"
BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}"
MNIST_8_ONNX: str = f"{ModelName.MNIST}-8.onnx"
DOG_BREED_ONNX: str = "dog_breed_classification"
CAT_DOG_ONNX: str = "cat_dog_classification"


class CurlOutput:
Expand Down
48 changes: 27 additions & 21 deletions utilities/inference_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from urllib.parse import urlparse

from kubernetes.dynamic import DynamicClient
from ocp_resources.inference_graph import InferenceGraph
from ocp_resources.inference_service import InferenceService
from ocp_resources.resource import get_client
from ocp_resources.service import Service
Expand Down Expand Up @@ -44,15 +45,17 @@ class Inference:
STREAMING: str = "streaming"
INFER: str = "infer"
MNIST: str = f"infer-{ModelName.MNIST}"
GRAPH: str = "graph"

def __init__(self, inference_service: InferenceService):
def __init__(self, inference_service: InferenceService | InferenceGraph):
"""
Args:
inference_service: InferenceService object
"""
self.inference_service = inference_service
self.deployment_mode = self.get_deployment_type()
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
if isinstance(self.inference_service, InferenceService):
self.runtime = get_inference_serving_runtime(isvc=self.inference_service)
self.visibility_exposed = self.is_service_exposed()

self.inference_url = self.get_inference_url()
Expand All @@ -69,7 +72,15 @@ def get_deployment_type(self) -> str:
):
return deployment_type

return self.inference_service.instance.status.deploymentMode
if isinstance(self.inference_service, InferenceService):
return self.inference_service.instance.status.deploymentMode

elif isinstance(self.inference_service, InferenceGraph):
# TODO: Get deployment type from InferenceGraph once it is supported and added as `status.deploymentMode`
return KServeDeploymentType.SERVERLESS

else:
raise ValueError(f"Unknown inference service type: {self.inference_service.name}")

def get_inference_url(self) -> str:
"""
Expand All @@ -83,20 +94,13 @@ def get_inference_url(self) -> str:

"""
if self.visibility_exposed:
if self.deployment_mode == KServeDeploymentType.SERVERLESS and (
url := self.inference_service.instance.status.components.predictor.url
):
return urlparse(url=url).netloc

elif self.deployment_mode == KServeDeploymentType.RAW_DEPLOYMENT and (
url := self.inference_service.instance.status.url
):
return urlparse(url=url).netloc

elif self.deployment_mode == KServeDeploymentType.MODEL_MESH:
if self.deployment_mode == KServeDeploymentType.MODEL_MESH:
route = get_model_route(client=self.inference_service.client, isvc=self.inference_service)
return route.instance.spec.host

elif url := self.inference_service.instance.status.url:
return urlparse(url=url).netloc

else:
raise ValueError(f"{self.inference_service.name}: No url found for inference")

Expand All @@ -113,7 +117,10 @@ def is_service_exposed(self) -> bool:
"""
labels = self.inference_service.labels

if self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT:
if (
isinstance(self.inference_service, InferenceService)
and self.deployment_mode in KServeDeploymentType.RAW_DEPLOYMENT
):
return labels and labels.get(Labels.Kserve.NETWORKING_KSERVE_IO) == Labels.Kserve.EXPOSED

if self.deployment_mode == KServeDeploymentType.SERVERLESS:
Expand Down Expand Up @@ -528,6 +535,7 @@ def create_isvc(
scale_target: int | None = None,
model_env_variables: list[dict[str, str]] | None = None,
teardown: bool = True,
protocol_version: str | None = None,
) -> Generator[InferenceService, Any, Any]:
"""
Create InferenceService object.
Expand Down Expand Up @@ -561,6 +569,7 @@ def create_isvc(
scale_target (int): Scale target
model_env_variables (list[dict[str, str]]): Model environment variables
teardown (bool): Teardown
protocol_version (str): Protocol version of the model server

Yields:
InferenceService: InferenceService object
Expand Down Expand Up @@ -610,12 +619,6 @@ def create_isvc(
if deployment_mode:
_annotations = {Annotations.KserveIo.DEPLOYMENT_MODE: deployment_mode}

if deployment_mode == KServeDeploymentType.SERVERLESS:
_annotations.update({
"serving.knative.openshift.io/enablePassthrough": "true",
"sidecar.istio.io/inject": "true",
"sidecar.istio.io/rewriteAppHTTPProbers": "true",
})
if enable_auth:
# model mesh auth is set in ServingRuntime
if deployment_mode == KServeDeploymentType.SERVERLESS:
Expand Down Expand Up @@ -646,6 +649,9 @@ def create_isvc(
if scale_target is not None:
predictor_dict["scaleTarget"] = scale_target

if protocol_version is not None:
predictor_dict["model"]["protocolVersion"] = protocol_version

with InferenceService(
client=client,
name=name,
Expand Down
Loading