Skip to content

Commit 85a8518

Browse files
brettmthompsonpre-commit-ci[bot]
authored andcommitted
Adding Test For InferenceService Zero Initial Scale (opendatahub-io#262)
* adding test for zero initial scale Signed-off-by: Brett Thompson <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing precommit error Signed-off-by: Brett Thompson <[email protected]> * using label_selectors when getting deployment Signed-off-by: Brett Thompson <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adding argument names to func call and running pre-commit on all files Signed-off-by: Brett Thompson <[email protected]> * fixing bug in ovms_kserve_inference_service function that was preventing isvcs from being created with 0 min-replicas Signed-off-by: Brett Thompson <[email protected]> --------- Signed-off-by: Brett Thompson <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a5d05cb commit 85a8518

File tree

2 files changed

+97
-2
lines changed

2 files changed

+97
-2
lines changed

tests/model_serving/model_server/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,16 +368,18 @@ def ovms_kserve_inference_service(
368368
if env_vars := request.param.get("env-vars"):
369369
isvc_kwargs["model_env_variables"] = env_vars
370370

371-
if min_replicas := request.param.get("min-replicas"):
371+
if (min_replicas := request.param.get("min-replicas")) is not None:
372372
isvc_kwargs["min_replicas"] = min_replicas
373+
if min_replicas == 0:
374+
isvc_kwargs["wait_for_predictor_pods"] = False
373375

374376
if max_replicas := request.param.get("max-replicas"):
375377
isvc_kwargs["max_replicas"] = max_replicas
376378

377379
if scale_metric := request.param.get("scale-metric"):
378380
isvc_kwargs["scale_metric"] = scale_metric
379381

380-
if scale_target := request.param.get("scale-target"):
382+
if (scale_target := request.param.get("scale-target")) is not None:
381383
isvc_kwargs["scale_target"] = scale_target
382384

383385
with create_isvc(**isvc_kwargs) as isvc:
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import pytest
2+
from ocp_resources.deployment import Deployment
3+
4+
from tests.model_serving.model_server.serverless.constants import (
5+
ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
6+
)
7+
from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
8+
from tests.model_serving.model_server.utils import verify_inference_response
9+
from utilities.constants import (
10+
Protocols,
11+
RunTimeConfigs,
12+
)
13+
from utilities.exceptions import DeploymentValidationError
14+
from utilities.general import create_isvc_label_selector_str
15+
from utilities.inference_utils import Inference
16+
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
17+
18+
pytestmark = [
19+
pytest.mark.serverless,
20+
pytest.mark.sanity,
21+
pytest.mark.usefixtures("valid_aws_config"),
22+
]
23+
24+
25+
@pytest.mark.serverless
26+
@pytest.mark.parametrize(
27+
"model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
28+
[
29+
pytest.param(
30+
{"name": "serverless-initial-scale-zero"},
31+
RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
32+
{
33+
**ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
34+
"min-replicas": 0,
35+
},
36+
)
37+
],
38+
indirect=True,
39+
)
40+
class TestServerlessInitialScaleZero:
41+
@pytest.mark.dependency(name="test_no_serverless_pods_created_for_zero_initial_scale")
42+
def test_no_serverless_pods_created_for_zero_initial_scale(self, admin_client, ovms_kserve_inference_service):
43+
"""Verify no pods are created when inference service initial scale is zero, i.e. zero min-replicas requested."""
44+
verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
45+
46+
@pytest.mark.dependency(name="test_no_serverless_replicas_created_for_zero_initial_scale")
47+
def test_no_serverless_replicas_created_for_zero_initial_scale(
48+
self, admin_client, ovms_kserve_inference_service, ovms_kserve_serving_runtime
49+
):
50+
"""Verify replica count is zero when inference service initial scale is zero"""
51+
labels = [
52+
"serving.knative.dev/configurationGeneration=1",
53+
create_isvc_label_selector_str(
54+
isvc=ovms_kserve_inference_service,
55+
resource_type="deployment",
56+
runtime_name=ovms_kserve_serving_runtime.name,
57+
),
58+
]
59+
60+
deployments = list(
61+
Deployment.get(
62+
label_selector=",".join(labels), client=admin_client, namespace=ovms_kserve_inference_service.namespace
63+
)
64+
)
65+
66+
if not deployments:
67+
raise DeploymentValidationError(
68+
f"Inference Service {ovms_kserve_inference_service.name} new deployment not found"
69+
)
70+
71+
if deployments[0].instance.spec.replicas == 0:
72+
deployments[0].wait_for_replicas(deployed=False)
73+
return
74+
75+
raise DeploymentValidationError(
76+
f"Inference Service {ovms_kserve_inference_service.name} deployment should have 0 replicas when created"
77+
)
78+
79+
@pytest.mark.dependency(
80+
depends=[
81+
"test_no_serverless_pods_created_for_zero_initial_scale",
82+
"test_no_serverless_replicas_created_for_zero_initial_scale",
83+
]
84+
)
85+
def test_serverless_inference_after_zero_initial_scale(self, ovms_kserve_inference_service):
86+
"""Verify model can be queried after being created with an initial scale of zero."""
87+
verify_inference_response(
88+
inference_service=ovms_kserve_inference_service,
89+
inference_config=ONNX_INFERENCE_CONFIG,
90+
inference_type=Inference.INFER,
91+
protocol=Protocols.HTTPS,
92+
use_default_query=True,
93+
)

0 commit comments

Comments
 (0)