Skip to content

Commit 38fc0de

Browse files
authored
[model server] add scale to zero tests (#172)
* Create size-labeler.yml * Delete .github/workflows/size-labeler.yml * model mesh - add auth tests * xx * feat: add scale to zero tests * feat: add scale to zero tests
1 parent 9c29239 commit 38fc0de

File tree

6 files changed

+162
-1
lines changed

6 files changed

+162
-1
lines changed

tests/model_serving/model_server/serverless/__init__.py

Whitespace-only changes.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pytest
2+
from _pytest.fixtures import FixtureRequest
3+
from ocp_resources.inference_service import InferenceService
4+
from ocp_resources.resource import ResourceEditor
5+
6+
7+
@pytest.fixture(scope="class")
8+
def inference_service_patched_replicas(
9+
request: FixtureRequest, ovms_serverless_inference_service: InferenceService
10+
) -> InferenceService:
11+
ResourceEditor(
12+
patches={
13+
ovms_serverless_inference_service: {
14+
"spec": {
15+
"predictor": {"minReplicas": request.param["min-replicas"]},
16+
}
17+
}
18+
}
19+
).update()
20+
21+
return ovms_serverless_inference_service
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import pytest
2+
from ocp_resources.deployment import Deployment
3+
4+
from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
5+
from tests.model_serving.model_server.utils import verify_inference_response
6+
from utilities.constants import (
7+
ModelFormat,
8+
ModelInferenceRuntime,
9+
ModelVersion,
10+
Protocols,
11+
)
12+
from utilities.exceptions import DeploymentValidationError
13+
from utilities.inference_utils import Inference
14+
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
15+
16+
pytestmark = [
17+
pytest.mark.serverless,
18+
pytest.mark.sanity,
19+
pytest.mark.usefixtures("valid_aws_config"),
20+
]
21+
22+
23+
@pytest.mark.serverless
24+
@pytest.mark.parametrize(
25+
"model_namespace, openvino_kserve_serving_runtime, ovms_serverless_inference_service",
26+
[
27+
pytest.param(
28+
{"name": "serverless-scale-zero"},
29+
{
30+
"runtime-name": ModelInferenceRuntime.ONNX_RUNTIME,
31+
"model-format": {ModelFormat.ONNX: ModelVersion.OPSET13},
32+
},
33+
{
34+
"name": ModelFormat.ONNX,
35+
"model-version": ModelVersion.OPSET13,
36+
"model-dir": "test-dir",
37+
},
38+
)
39+
],
40+
indirect=True,
41+
)
42+
class TestServerlessScaleToZero:
43+
def test_serverless_before_scale_to_zero(self, ovms_serverless_inference_service):
44+
"""Verify model can be queried before scaling to zero"""
45+
verify_inference_response(
46+
inference_service=ovms_serverless_inference_service,
47+
inference_config=ONNX_INFERENCE_CONFIG,
48+
inference_type=Inference.INFER,
49+
protocol=Protocols.HTTPS,
50+
use_default_query=True,
51+
)
52+
53+
@pytest.mark.parametrize(
54+
"inference_service_patched_replicas",
55+
[pytest.param({"min-replicas": 0})],
56+
indirect=True,
57+
)
58+
@pytest.mark.dependency(name="test_no_serverless_pods_after_scale_to_zero")
59+
def test_no_serverless_pods_after_scale_to_zero(self, admin_client, inference_service_patched_replicas):
60+
"""Verify pods are scaled to zero"""
61+
verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas)
62+
63+
@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
64+
def test_serverless_inference_after_scale_to_zero(self, ovms_serverless_inference_service):
65+
"""Verify model can be queried after scaling to zero"""
66+
verify_inference_response(
67+
inference_service=ovms_serverless_inference_service,
68+
inference_config=ONNX_INFERENCE_CONFIG,
69+
inference_type=Inference.INFER,
70+
protocol=Protocols.HTTPS,
71+
use_default_query=True,
72+
)
73+
74+
@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
75+
def test_no_serverless_pods_when_no_traffic(self, admin_client, ovms_serverless_inference_service):
76+
"""Verify pods are scaled to zero when no traffic is sent"""
77+
verify_no_inference_pods(client=admin_client, isvc=ovms_serverless_inference_service)
78+
79+
@pytest.mark.parametrize(
80+
"inference_service_patched_replicas",
81+
[pytest.param({"min-replicas": 1})],
82+
indirect=True,
83+
)
84+
def test_serverless_pods_after_scale_to_one_replica(self, admin_client, inference_service_patched_replicas):
85+
"""Verify pod is running after scaling to 1 replica"""
86+
for deployment in Deployment.get(
87+
client=admin_client,
88+
namespace=inference_service_patched_replicas.namespace,
89+
):
90+
if deployment.labels["serving.knative.dev/configurationGeneration"] == "3":
91+
deployment.wait_for_replicas()
92+
return
93+
94+
raise DeploymentValidationError(
95+
f"Inference Service {inference_service_patched_replicas.name} new deployment not found"
96+
)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from kubernetes.dynamic import DynamicClient
2+
from ocp_resources.inference_service import InferenceService
3+
from simple_logger.logger import get_logger
4+
from timeout_sampler import TimeoutSampler
5+
6+
from utilities.constants import Timeout
7+
from utilities.infra import get_pods_by_isvc_label
8+
9+
10+
LOGGER = get_logger(name=__name__)
11+
12+
13+
def verify_no_inference_pods(client: DynamicClient, isvc: InferenceService) -> None:
14+
"""
15+
Verify that no inference pods are running for the given InferenceService.
16+
17+
Args:
18+
client (DynamicClient): DynamicClient object
19+
isvc (InferenceService): InferenceService object
20+
21+
Raises:
22+
TimeoutError: If pods are exist after the timeout.
23+
24+
"""
25+
pods = []
26+
27+
try:
28+
pods = TimeoutSampler(
29+
wait_timeout=Timeout.TIMEOUT_4MIN,
30+
sleep=5,
31+
func=get_pods_by_isvc_label,
32+
client=client,
33+
isvc=isvc,
34+
)
35+
if not pods:
36+
return
37+
38+
except TimeoutError:
39+
LOGGER.error(f"{[pod.name for pod in pods]} were not deleted")
40+
raise

utilities/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,7 @@ def __init__(self, type: str):
6868

6969
def __str__(self) -> str:
7070
return f"The {self.type} is not supported"
71+
72+
73+
class DeploymentValidationError(Exception):
74+
pass

utilities/infra.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def create_ns(
9191
def wait_for_inference_deployment_replicas(
9292
client: DynamicClient,
9393
isvc: InferenceService,
94-
runtime_name: str | None,
94+
runtime_name: str | None = None,
9595
expected_num_deployments: int = 1,
9696
timeout: int = Timeout.TIMEOUT_5MIN,
9797
) -> list[Deployment]:

0 commit comments

Comments
 (0)