Skip to content
15 changes: 8 additions & 7 deletions tests/model_serving/model_server/serverless/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@
def inference_service_patched_replicas(
request: FixtureRequest, ovms_kserve_inference_service: InferenceService
) -> InferenceService:
ResourceEditor(
patches={
ovms_kserve_inference_service: {
"spec": {
"predictor": {"minReplicas": request.param["min-replicas"]},
if hasattr(request, "param"):
ResourceEditor(
patches={
ovms_kserve_inference_service: {
"spec": {
"predictor": {"minReplicas": request.param["min-replicas"]},
}
}
}
}
).update()
).update()

return ovms_kserve_inference_service

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
pytest.mark.usefixtures("valid_aws_config"),
]

NO_PODS_AFTER_SCALE_TEST_NAME: str = "test_no_serverless_pods_after_scale_to_zero"
INFERENCE_AFTER_SCALE_TEST_NAME: str = "test_serverless_inference_after_scale_to_zero"


@pytest.mark.serverless
@pytest.mark.parametrize(
Expand All @@ -39,6 +42,7 @@
indirect=True,
)
class TestServerlessScaleToZero:
@pytest.mark.order(1)
def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service):
"""Verify model can be queried before scaling to zero"""
verify_inference_response(
Expand All @@ -54,32 +58,41 @@ def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service):
[pytest.param({"min-replicas": 0})],
indirect=True,
)
@pytest.mark.dependency(name="test_no_serverless_pods_after_scale_to_zero")
@pytest.mark.order(2)
@pytest.mark.dependency(name=NO_PODS_AFTER_SCALE_TEST_NAME)
def test_no_serverless_pods_after_scale_to_zero(self, admin_client, inference_service_patched_replicas):
"""Verify pods are scaled to zero"""
verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas)

@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
def test_serverless_inference_after_scale_to_zero(self, ovms_kserve_inference_service):
@pytest.mark.dependency(
name=INFERENCE_AFTER_SCALE_TEST_NAME,
depends=[NO_PODS_AFTER_SCALE_TEST_NAME],
)
@pytest.mark.order(3)
def test_serverless_inference_after_scale_to_zero(self, inference_service_patched_replicas):
"""Verify model can be queried after scaling to zero"""
verify_inference_response(
inference_service=ovms_kserve_inference_service,
inference_service=inference_service_patched_replicas,
inference_config=ONNX_INFERENCE_CONFIG,
inference_type=Inference.INFER,
protocol=Protocols.HTTPS,
use_default_query=True,
)

@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
def test_no_serverless_pods_when_no_traffic(self, admin_client, ovms_kserve_inference_service):
@pytest.mark.dependency(
depends=[INFERENCE_AFTER_SCALE_TEST_NAME],
)
@pytest.mark.order(4)
def test_no_serverless_pods_when_no_traffic(self, admin_client, inference_service_patched_replicas):
"""Verify pods are scaled to zero when no traffic is sent"""
verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas)

@pytest.mark.parametrize(
"inference_service_patched_replicas",
[pytest.param({"min-replicas": 1})],
indirect=True,
)
@pytest.mark.order(5)
def test_serverless_pods_after_scale_to_one_replica(self, admin_client, inference_service_patched_replicas):
"""Verify pod is running after scaling to 1 replica"""
for deployment in Deployment.get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@
},
MinIo.PodConfig.KSERVE_MINIO_CONFIG,
MINIO_DATA_CONNECTION_CONFIG,
{"runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE, **RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG},
{
"runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE,
"external-route": True,
**RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
},
{
"name": f"{ModelName.MNIST}-model",
"model-format": ModelAndFormat.OPENVINO_IR,
Expand Down
2 changes: 1 addition & 1 deletion tests/model_serving/model_server/upgrade/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def caikit_raw_inference_service_scope_session(
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
storage_key=models_endpoint_s3_secret_scope_session.name,
storage_path=ModelStoragePath.EMBEDDING_MODEL,
external_route=True,
external_route=False,
teardown=teardown_resources,
**isvc_kwargs,
) as isvc:
Expand Down
4 changes: 2 additions & 2 deletions tests/model_serving/model_server/upgrade/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_raw_caikit_bge_pre_upgrade_inference(self, caikit_raw_inference_service
inference_service=caikit_raw_inference_service_scope_session,
inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG,
inference_type="embedding",
protocol=Protocols.HTTPS,
protocol=Protocols.HTTP,
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
use_default_query=True,
)
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_raw_caikit_bge_post_upgrade_inference(self, caikit_raw_inference_servic
inference_service=caikit_raw_inference_service_scope_session,
inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG,
inference_type="embedding",
protocol=Protocols.HTTPS,
protocol=Protocols.HTTP,
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
use_default_query=True,
)
Expand Down
15 changes: 11 additions & 4 deletions tests/model_serving/model_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,17 @@ def verify_inference_response(
if isinstance(response, list):
response = response[0]

response_text = response[inference.inference_response_text_key_name]
assert response_text == expected_response_text, (
f"Expected: {expected_response_text} does not mathc response: {response_text}"
)
if isinstance(response, dict):
response_text = response[inference.inference_response_text_key_name]
assert response_text == expected_response_text, (
f"Expected: {expected_response_text} does not match response: {response_text}"
)

else:
raise InferenceResponseError(
"Inference response output does not match expected output format."
f"Expected: {expected_response_text}.\nResponse: {res}"
)

else:
raise InferenceResponseError(f"Inference response output not found in response. Response: {res}")
Expand Down
24 changes: 12 additions & 12 deletions utilities/infra.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from kubernetes.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError
from kubernetes.dynamic.exceptions import NotFoundError, ResourceNotFoundError, ResourceNotUniqueError
from ocp_resources.catalog_source import CatalogSource
from ocp_resources.cluster_service_version import ClusterServiceVersion
from ocp_resources.config_map import ConfigMap
Expand Down Expand Up @@ -645,9 +645,7 @@ def verify_no_failed_pods(
is_waiting_pull_back_off = (
wait_state := container_status.state.waiting
) and wait_state.reason in (
pod.Status.IMAGE_PULL_BACK_OFF,
pod.Status.CRASH_LOOPBACK_OFF,
pod.Status.ERR_IMAGE_PULL,
"InvalidImageName",
)

Expand All @@ -664,8 +662,6 @@ def verify_no_failed_pods(
elif pod_status.phase in (
pod.Status.CRASH_LOOPBACK_OFF,
pod.Status.FAILED,
pod.Status.IMAGE_PULL_BACK_OFF,
pod.Status.ERR_IMAGE_PULL,
):
failed_pods[pod.name] = pod_status

Expand Down Expand Up @@ -800,13 +796,17 @@ def wait_for_serverless_pods_deletion(resource: Project | Namespace, admin_clien
"""
client = admin_client or get_client()
for pod in Pod.get(dyn_client=client, namespace=resource.name):
if (
pod.exists
and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE)
== KServeDeploymentType.SERVERLESS
):
LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted")
pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN)
try:
if (
pod.exists
and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE)
== KServeDeploymentType.SERVERLESS
):
LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted")
pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN)

except (ResourceNotFoundError, NotFoundError):
LOGGER.info(f"Pod {pod.name} is deleted")


@retry(
Expand Down