diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index ef6092fef..9e2da511a 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -368,8 +368,10 @@ def ovms_kserve_inference_service( if env_vars := request.param.get("env-vars"): isvc_kwargs["model_env_variables"] = env_vars - if min_replicas := request.param.get("min-replicas"): + if (min_replicas := request.param.get("min-replicas")) is not None: isvc_kwargs["min_replicas"] = min_replicas + if min_replicas == 0: + isvc_kwargs["wait_for_predictor_pods"] = False if max_replicas := request.param.get("max-replicas"): isvc_kwargs["max_replicas"] = max_replicas @@ -377,7 +379,7 @@ def ovms_kserve_inference_service( if scale_metric := request.param.get("scale-metric"): isvc_kwargs["scale_metric"] = scale_metric - if scale_target := request.param.get("scale-target"): + if (scale_target := request.param.get("scale-target")) is not None: isvc_kwargs["scale_target"] = scale_target with create_isvc(**isvc_kwargs) as isvc: diff --git a/tests/model_serving/model_server/serverless/test_zero_initial_scale.py b/tests/model_serving/model_server/serverless/test_zero_initial_scale.py new file mode 100644 index 000000000..5133fb187 --- /dev/null +++ b/tests/model_serving/model_server/serverless/test_zero_initial_scale.py @@ -0,0 +1,93 @@ +import pytest +from ocp_resources.deployment import Deployment + +from tests.model_serving.model_server.serverless.constants import ( + ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG, +) +from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods +from tests.model_serving.model_server.utils import verify_inference_response +from utilities.constants import ( + Protocols, + RunTimeConfigs, +) +from utilities.exceptions import DeploymentValidationError +from utilities.general import create_isvc_label_selector_str +from utilities.inference_utils import Inference +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG + +pytestmark = [ + pytest.mark.serverless, + pytest.mark.sanity, + pytest.mark.usefixtures("valid_aws_config"), +] + + +@pytest.mark.serverless +@pytest.mark.parametrize( + "model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service", + [ + pytest.param( + {"name": "serverless-initial-scale-zero"}, + RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG, + { + **ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG, + "min-replicas": 0, + }, + ) + ], + indirect=True, +) +class TestServerlessInitialScaleZero: + @pytest.mark.dependency(name="test_no_serverless_pods_created_for_zero_initial_scale") + def test_no_serverless_pods_created_for_zero_initial_scale(self, admin_client, ovms_kserve_inference_service): + """Verify no pods are created when inference service initial scale is zero, i.e. zero min-replicas requested.""" + verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service) + + @pytest.mark.dependency(name="test_no_serverless_replicas_created_for_zero_initial_scale") + def test_no_serverless_replicas_created_for_zero_initial_scale( + self, admin_client, ovms_kserve_inference_service, ovms_kserve_serving_runtime + ): + """Verify replica count is zero when inference service initial scale is zero""" + labels = [ + "serving.knative.dev/configurationGeneration=1", + create_isvc_label_selector_str( + isvc=ovms_kserve_inference_service, + resource_type="deployment", + runtime_name=ovms_kserve_serving_runtime.name, + ), + ] + + deployments = list( + Deployment.get( + label_selector=",".join(labels), client=admin_client, namespace=ovms_kserve_inference_service.namespace + ) + ) + + if not deployments: + raise DeploymentValidationError( + f"Inference Service {ovms_kserve_inference_service.name} new deployment not found" + ) + + if deployments[0].instance.spec.replicas == 0: + deployments[0].wait_for_replicas(deployed=False) + return + + raise DeploymentValidationError( + f"Inference Service {ovms_kserve_inference_service.name} deployment should have 0 replicas when created" + ) + + @pytest.mark.dependency( + depends=[ + "test_no_serverless_pods_created_for_zero_initial_scale", + "test_no_serverless_replicas_created_for_zero_initial_scale", + ] + ) + def test_serverless_inference_after_zero_initial_scale(self, ovms_kserve_inference_service): + """Verify model can be queried after being created with an initial scale of zero.""" + verify_inference_response( + inference_service=ovms_kserve_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + )