Skip to content
Merged
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import pytest
from ocp_resources.deployment import Deployment

from tests.model_serving.model_server.serverless.constants import (
ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
)
from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import (
Protocols,
RunTimeConfigs,
)
from utilities.exceptions import DeploymentValidationError
from utilities.general import create_isvc_label_selector_str
from utilities.inference_utils import Inference
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG

pytestmark = [
pytest.mark.serverless,
pytest.mark.sanity,
pytest.mark.usefixtures("valid_aws_config"),
]


@pytest.mark.serverless
@pytest.mark.parametrize(
"model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
[
pytest.param(
{"name": "serverless-initial-scale-zero"},
RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
{
**ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
"min-replicas": 0,
},
)
],
indirect=True,
)
class TestServerlessInitialScaleZero:
@pytest.mark.dependency(name="test_no_serverless_pods_created_for_zero_initial_scale")
def test_no_serverless_pods_created_for_zero_initial_scale(self, admin_client, ovms_kserve_inference_service):
"""Verify no pods are created when inference service initial scale is zero, i.e. zero min-replicas requested."""
verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)

@pytest.mark.dependency(name="test_no_serverless_replicas_created_for_zero_initial_scale")
def test_no_serverless_replicas_created_for_zero_initial_scale(
self, admin_client, ovms_kserve_inference_service, ovms_kserve_serving_runtime
):
"""Verify replica count is zero when inference service initial scale is zero"""
labels = [
"serving.knative.dev/configurationGeneration=1",
create_isvc_label_selector_str(
isvc=ovms_kserve_inference_service,
resource_type="depoyment",
runtime_name=ovms_kserve_serving_runtime.name,
),
Comment thread
rnetser marked this conversation as resolved.
]

deployments = list(
Deployment.get(
label_selector=",".join(labels), client=admin_client, namespace=ovms_kserve_inference_service.namespace
)
)

if not deployments:
raise DeploymentValidationError(
f"Inference Service {ovms_kserve_inference_service.name} new deployment not found"
)

if deployments[0].instance.spec.replicas == 0:
deployments[0].wait_for_replicas()
return

raise DeploymentValidationError(
f"Inference Service {ovms_kserve_inference_service.name} deployment should have 0 replicas when created"
)

@pytest.mark.dependency(
depends=[
"test_no_serverless_pods_created_for_zero_initial_scale",
"test_no_serverless_replicas_created_for_zero_initial_scale",
]
)
def test_serverless_inference_after_zero_initial_scale(self, ovms_kserve_inference_service):
"""Verify model can be queried after being created with an initial scale of zero."""
verify_inference_response(
inference_service=ovms_kserve_inference_service,
inference_config=ONNX_INFERENCE_CONFIG,
inference_type=Inference.INFER,
protocol=Protocols.HTTPS,
use_default_query=True,
)