opendatahub-io · israel-hdez · Jul 7, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
@@ -359,6 +359,8 @@ def ovms_kserve_inference_service(
     if (scale_target := request.param.get("scale-target")) is not None:
         isvc_kwargs["scale_target"] = scale_target
 
+    isvc_kwargs["stop_resume"] = request.param.get("stop", False)
+
     with create_isvc(**isvc_kwargs) as isvc:
         yield isvc
 
@@ -382,6 +384,7 @@ def ovms_raw_inference_service(
         model_format=ModelAndFormat.OPENVINO_IR,
         deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
         model_version=request.param["model-version"],
+        stop_resume=request.param.get("stop", False),
     ) as isvc:
         yield isvc
 

@@ -61,7 +61,7 @@ def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service):
     @pytest.mark.dependency(name=NO_PODS_AFTER_SCALE_TEST_NAME)
     def test_no_serverless_pods_after_scale_to_zero(self, unprivileged_client, inference_service_patched_replicas):
         """Verify pods are scaled to zero"""
-        verify_no_inference_pods(client=unprivileged_client, isvc=inference_service_patched_replicas)
+        assert verify_no_inference_pods(client=unprivileged_client, isvc=inference_service_patched_replicas)
 
     @pytest.mark.dependency(
         name=INFERENCE_AFTER_SCALE_TEST_NAME,
@@ -84,7 +84,7 @@ def test_serverless_inference_after_scale_to_zero(self, inference_service_patche
     @pytest.mark.order(4)
     def test_no_serverless_pods_when_no_traffic(self, unprivileged_client, inference_service_patched_replicas):
         """Verify pods are scaled to zero when no traffic is sent"""
-        verify_no_inference_pods(client=unprivileged_client, isvc=inference_service_patched_replicas)
+        assert verify_no_inference_pods(client=unprivileged_client, isvc=inference_service_patched_replicas)
 
     @pytest.mark.parametrize(
         "inference_service_patched_replicas",

@@ -41,7 +41,7 @@ class TestServerlessInitialScaleZero:
     @pytest.mark.dependency(name="test_no_serverless_pods_created_for_zero_initial_scale")
     def test_no_serverless_pods_created_for_zero_initial_scale(self, admin_client, ovms_kserve_inference_service):
         """Verify no pods are created when inference service initial scale is zero, i.e. zero min-replicas requested."""
-        verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
+        assert verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
 
     @pytest.mark.dependency(name="test_no_serverless_replicas_created_for_zero_initial_scale")
     def test_no_serverless_replicas_created_for_zero_initial_scale(

@@ -17,37 +17,43 @@
 LOGGER = get_logger(name=__name__)
 
 
-def verify_no_inference_pods(client: DynamicClient, isvc: InferenceService) -> None:
+def verify_no_inference_pods(
+    client: DynamicClient, isvc: InferenceService, wait_timeout: int = Timeout.TIMEOUT_4MIN
+) -> bool:
     """
     Verify that no inference pods are running for the given InferenceService.
 
     Args:
         client (DynamicClient): DynamicClient object
         isvc (InferenceService): InferenceService object
+        wait_timeout (int): Timeout in seconds, default is 4 minutes
 
+    Returns:
+        bool: True if no pods are running, False otherwise
     Raises:
-        TimeoutError: If pods are exist after the timeout.
+        TimeoutError: If pods exist after the timeout.
 
     """
     pods = []
 
     try:
         for pods in TimeoutSampler(
-            wait_timeout=Timeout.TIMEOUT_4MIN,
+            wait_timeout=wait_timeout,
             sleep=5,
             exceptions_dict=DEFAULT_CLUSTER_RETRY_EXCEPTIONS,
             func=get_pods_by_isvc_label,
             client=client,
             isvc=isvc,
         ):
             if not pods:
-                return
+                return True
 
     except TimeoutExpiredError as e:
         if isinstance(e.last_exp, ResourceNotFoundError):
-            return
+            return True
         LOGGER.error(f"{[pod.name for pod in pods]} were not deleted")
-        raise
+        return False
+    return True
 
 
 def wait_for_canary_rollout(isvc: InferenceService, percentage: int, timeout: int = Timeout.TIMEOUT_5MIN) -> None:

@@ -0,0 +1,40 @@
+from typing import Generator, Any
+
+import pytest
+from ocp_resources.inference_service import InferenceService
+from ocp_resources.resource import ResourceEditor
+from utilities.constants import Annotations
+
+
+@pytest.fixture(scope="function")
+def patched_inference_service_stop_annotation(
+    request: pytest.FixtureRequest,
+    ovms_kserve_inference_service: InferenceService,
+) -> Generator[InferenceService, Any, Any]:
+    with ResourceEditor(
+        patches={
+            ovms_kserve_inference_service: {
+                "metadata": {
+                    "annotations": {Annotations.KserveIo.FORCE_STOP_RUNTIME: request.param.get("stop", "false")}
+                },
+            }
+        }
+    ):
+        yield ovms_kserve_inference_service
+
+
+@pytest.fixture(scope="function")
+def patched_raw_inference_service_stop_annotation(
+    request: pytest.FixtureRequest,
+    ovms_raw_inference_service: InferenceService,
+) -> Generator[InferenceService, Any, Any]:
+    with ResourceEditor(
+        patches={
+            ovms_raw_inference_service: {
+                "metadata": {
+                    "annotations": {Annotations.KserveIo.FORCE_STOP_RUNTIME: request.param.get("stop", "false")}
+                },
+            }
+        }
+    ):
+        yield ovms_raw_inference_service
@@ -0,0 +1,125 @@
+import pytest
+
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.constants import (
+    ModelFormat,
+    ModelVersion,
+    Protocols,
+    RunTimeConfigs,
+)
+from utilities.inference_utils import Inference
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+from tests.model_serving.model_server.stop_resume.utils import consistently_verify_no_pods_exist
+
+pytestmark = [pytest.mark.serverless, pytest.mark.usefixtures("valid_aws_config")]
+
+
+@pytest.mark.rawdeployment
+@pytest.mark.smoke
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_raw_inference_service",
+    [
+        pytest.param(
+            {"name": "kserve-raw-stop-resume"},
+            RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
+            {
+                "name": ModelFormat.ONNX,
+                "model-version": ModelVersion.OPSET13,
+                "model-dir": "test-dir",
+                "stop": False,
+            },
+        )
+    ],
+    indirect=True,
+)
+class TestStopRaw:
+    def test_raw_onnx_rest_inference(
+        self, unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_raw_inference_service
+    ):
+        """Verify that kserve Raw ONNX model can be queried using REST"""
+        verify_inference_response(
+            inference_service=ovms_raw_inference_service,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.INFER,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
+
+    @pytest.mark.parametrize(
+        "patched_raw_inference_service_stop_annotation",
+        [pytest.param({"stop": "true"})],
+        indirect=True,
+    )
+    def test_stop_and_update_to_true_delete_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_raw_inference_service,
+        patched_raw_inference_service_stop_annotation,
+    ):
+        """Verify pod rollout is deleted when the stop annotation updated to true"""
+        """Verify pods do not exist"""
+        result = consistently_verify_no_pods_exist(
+            client=unprivileged_client,
+            isvc=patched_raw_inference_service_stop_annotation,
+        )
+        assert result, "Verification failed: pods were found when none should exist"
+
+
+@pytest.mark.rawdeployment
+@pytest.mark.smoke
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_raw_inference_service",
+    [
+        pytest.param(
+            {"name": "kserve-raw-stop-resume"},
+            RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
+            {
+                "name": ModelFormat.ONNX,
+                "model-version": ModelVersion.OPSET13,
+                "model-dir": "test-dir",
+                "stop": True,
+            },
+        )
+    ],
+    indirect=True,
+)
+class TestStoppedResumeRaw:
+    def test_stop_and_true_no_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_raw_inference_service,
+    ):
+        """Verify no pod rollout when the stop annotation is true"""
+        """Verify pods do not exist"""
+        result = consistently_verify_no_pods_exist(
+            client=unprivileged_client,
+            isvc=ovms_raw_inference_service,
+        )
+        assert result, "Verification failed: pods were found when none should exist"
+
+    @pytest.mark.parametrize(
+        "patched_raw_inference_service_stop_annotation",
+        [pytest.param({"stop": "false"})],
+        indirect=True,
+    )
+    def test_stop_and_update_to_false_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_raw_inference_service,
+        patched_raw_inference_service_stop_annotation,
+    ):
+        """Verify pod rollout when the stop annotation is updated to false"""
+        """Verify that kserve Raw ONNX model can be queried using REST"""
+        verify_inference_response(
+            inference_service=patched_raw_inference_service_stop_annotation,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.INFER,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
@@ -0,0 +1,128 @@
+import pytest
+
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.constants import (
+    KServeDeploymentType,
+    ModelFormat,
+    ModelVersion,
+    Protocols,
+    RunTimeConfigs,
+)
+from utilities.inference_utils import Inference
+from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
+from tests.model_serving.model_server.stop_resume.utils import consistently_verify_no_pods_exist
+
+pytestmark = [pytest.mark.serverless, pytest.mark.usefixtures("valid_aws_config")]
+
+
+@pytest.mark.serverless
+@pytest.mark.smoke
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
+    [
+        pytest.param(
+            {"name": "kserve-serverless-stop-resume"},
+            RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
+            {
+                "name": ModelFormat.ONNX,
+                "model-version": ModelVersion.OPSET13,
+                "model-dir": "test-dir",
+                "deployment-mode": KServeDeploymentType.SERVERLESS,
+                "stop": False,
+            },
+        )
+    ],
+    indirect=True,
+)
+class TestStopServerless:
+    def test_serverless_onnx_rest_inference(
+        self, unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service
+    ):
+        """Verify that kserve Serverless ONNX model can be queried using REST"""
+        verify_inference_response(
+            inference_service=ovms_kserve_inference_service,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.INFER,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )
+
+    @pytest.mark.parametrize(
+        "patched_inference_service_stop_annotation",
+        [pytest.param({"stop": "true"})],
+        indirect=True,
+    )
+    def test_stop_and_update_to_true_delete_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_kserve_inference_service,
+        patched_inference_service_stop_annotation,
+    ):
+        """Verify pod rollout is deleted when the stop annotation updated to true"""
+        """Verify pods do not exist"""
+        result = consistently_verify_no_pods_exist(
+            client=unprivileged_client,
+            isvc=patched_inference_service_stop_annotation,
+        )
+        assert result, "Verification failed: pods were found when none should exist"
+
+
+@pytest.mark.serverless
+@pytest.mark.smoke
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
+    [
+        pytest.param(
+            {"name": "kserve-serverless-stop-resume"},
+            RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
+            {
+                "name": ModelFormat.ONNX,
+                "model-version": ModelVersion.OPSET13,
+                "model-dir": "test-dir",
+                "deployment-mode": KServeDeploymentType.SERVERLESS,
+                "stop": True,
+            },
+        )
+    ],
+    indirect=True,
+)
+class TestStoppedResumeServerless:
+    def test_stop_and_true_no_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_kserve_inference_service,
+    ):
+        """Verify no pod rollout when the stop annotation is true"""
+        """Verify pods do not exist"""
+        result = consistently_verify_no_pods_exist(
+            client=unprivileged_client,
+            isvc=ovms_kserve_inference_service,
+        )
+        assert result, "Verification failed: pods were found when none should exist"
+
+    @pytest.mark.parametrize(
+        "patched_inference_service_stop_annotation",
+        [pytest.param({"stop": "false"})],
+        indirect=True,
+    )
+    def test_stop_and_update_to_false_pod_rollout(
+        self,
+        unprivileged_client,
+        unprivileged_model_namespace,
+        ovms_kserve_serving_runtime,
+        ovms_kserve_inference_service,
+        patched_inference_service_stop_annotation,
+    ):
+        """Verify pod rollout when the stop annotation is updated to false"""
+        """Verify that kserve Serverless ONNX model can be queried using REST"""
+        verify_inference_response(
+            inference_service=patched_inference_service_stop_annotation,
+            inference_config=ONNX_INFERENCE_CONFIG,
+            inference_type=Inference.INFER,
+            protocol=Protocols.HTTPS,
+            use_default_query=True,
+        )