Adding Test For InferenceService Zero Initial Scale (opendatahub-io#262)

brettmthompson · pre-commit-ci[bot] · dbasunag · commit 85a8518d2e60 · 2025-05-08T09:15:05.000-04:00
* adding test for zero initial scale Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing precommit error Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> * using label_selectors when getting deployment Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adding argument names to func call and running pre-commit on all files Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> * fixing bug in ovms_kserve_inference_service function that was preventing isvcs from being created with 0 min-replicas Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> --------- Signed-off-by: Brett Thompson <196701379+brettmthompson@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py
@@ -368,16 +368,18 @@ def ovms_kserve_inference_service(
     if env_vars := request.param.get("env-vars"):
         isvc_kwargs["model_env_variables"] = env_vars
 
-    if min_replicas := request.param.get("min-replicas"):
+    if (min_replicas := request.param.get("min-replicas")) is not None:
         isvc_kwargs["min_replicas"] = min_replicas
+        if min_replicas == 0:
+            isvc_kwargs["wait_for_predictor_pods"] = False
 
     if max_replicas := request.param.get("max-replicas"):
         isvc_kwargs["max_replicas"] = max_replicas
 
     if scale_metric := request.param.get("scale-metric"):
         isvc_kwargs["scale_metric"] = scale_metric
 
-    if scale_target := request.param.get("scale-target"):
+    if (scale_target := request.param.get("scale-target")) is not None:
         isvc_kwargs["scale_target"] = scale_target
 
     with create_isvc(**isvc_kwargs) as isvc:
diff --git a/tests/model_serving/model_server/serverless/test_zero_initial_scale.py b/tests/model_serving/model_server/serverless/test_zero_initial_scale.py
@@ -0,0 +1,93 @@
+import pytest
+from ocp_resources.deployment import Deployment
+
+from tests.model_serving.model_server.serverless.constants import (
+    ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
+)
+from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.constants import (
+    Protocols,
+    RunTimeConfigs,
+)
+from utilities.exceptions import DeploymentValidationError
+from utilities.general import create_isvc_label_selector_str
+from utilities.inference_utils import Inference
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.serverless,
+    pytest.mark.sanity,
+    pytest.mark.usefixtures("valid_aws_config"),
+]
+
+
+@pytest.mark.serverless
+@pytest.mark.parametrize(
+    "model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
+    [
+        pytest.param(
+            {"name": "serverless-initial-scale-zero"},
+            RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
+            {
+                **ONNX_SERVERLESS_INFERENCE_SERVICE_CONFIG,
+                "min-replicas": 0,
+            },
+        )
+    ],
+    indirect=True,
+)
+class TestServerlessInitialScaleZero:
+    @pytest.mark.dependency(name="test_no_serverless_pods_created_for_zero_initial_scale")
+    def test_no_serverless_pods_created_for_zero_initial_scale(self, admin_client, ovms_kserve_inference_service):
+        """Verify no pods are created when inference service initial scale is zero, i.e. zero min-replicas requested."""
+        verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
+
+    @pytest.mark.dependency(name="test_no_serverless_replicas_created_for_zero_initial_scale")
+    def test_no_serverless_replicas_created_for_zero_initial_scale(
+        self, admin_client, ovms_kserve_inference_service, ovms_kserve_serving_runtime
+    ):
+        """Verify replica count is zero when inference service initial scale is zero"""
+        labels = [
+            "serving.knative.dev/configurationGeneration=1",
+            create_isvc_label_selector_str(
+                isvc=ovms_kserve_inference_service,
+                resource_type="deployment",
+                runtime_name=ovms_kserve_serving_runtime.name,
+            ),
+        ]
+
+        deployments = list(
+            Deployment.get(
+                label_selector=",".join(labels), client=admin_client, namespace=ovms_kserve_inference_service.namespace
+            )
+        )
+
+        if not deployments:
+            raise DeploymentValidationError(
+                f"Inference Service {ovms_kserve_inference_service.name} new deployment not found"
+            )
+
+        if deployments[0].instance.spec.replicas == 0:
+            deployments[0].wait_for_replicas(deployed=False)
+            return
+
+        raise DeploymentValidationError(
+            f"Inference Service {ovms_kserve_inference_service.name} deployment should have 0 replicas when created"
+        )
+
+    @pytest.mark.dependency(
+        depends=[
+            "test_no_serverless_pods_created_for_zero_initial_scale",
+            "test_no_serverless_replicas_created_for_zero_initial_scale",
+        ]
+    )
+    def test_serverless_inference_after_zero_initial_scale(self, ovms_kserve_inference_service):
+        """Verify model can be queried after being created with an initial scale of zero."""
+        verify_inference_response(
+            inference_service=ovms_kserve_inference_service,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.INFER,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )