Skip to content

Commit 1dce0c5

Browse files
authored
[model server] Remove pod pre-checks for image pull and fix TestServerlessScaleToZero (#256)
* fix: update tests * fix: update tests * fix: update tests * fix: save test dep name * fix: minio mm external route * fix: address comemnt * fix: address comemnt * fix: address comemnt
1 parent 23da254 commit 1dce0c5

File tree

7 files changed

+59
-34
lines changed

7 files changed

+59
-34
lines changed

tests/model_serving/model_server/serverless/conftest.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@
2222
def inference_service_patched_replicas(
2323
request: FixtureRequest, ovms_kserve_inference_service: InferenceService
2424
) -> InferenceService:
25-
ResourceEditor(
26-
patches={
27-
ovms_kserve_inference_service: {
28-
"spec": {
29-
"predictor": {"minReplicas": request.param["min-replicas"]},
25+
if hasattr(request, "param"):
26+
ResourceEditor(
27+
patches={
28+
ovms_kserve_inference_service: {
29+
"spec": {
30+
"predictor": {"minReplicas": request.param["min-replicas"]},
31+
}
3032
}
3133
}
32-
}
33-
).update()
34+
).update()
3435

3536
return ovms_kserve_inference_service
3637

tests/model_serving/model_server/serverless/test_scale_to_zero.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
pytest.mark.usefixtures("valid_aws_config"),
2121
]
2222

23+
NO_PODS_AFTER_SCALE_TEST_NAME: str = "test_no_serverless_pods_after_scale_to_zero"
24+
INFERENCE_AFTER_SCALE_TEST_NAME: str = "test_serverless_inference_after_scale_to_zero"
25+
2326

2427
@pytest.mark.serverless
2528
@pytest.mark.parametrize(
@@ -39,6 +42,7 @@
3942
indirect=True,
4043
)
4144
class TestServerlessScaleToZero:
45+
@pytest.mark.order(1)
4246
def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service):
4347
"""Verify model can be queried before scaling to zero"""
4448
verify_inference_response(
@@ -54,32 +58,41 @@ def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service):
5458
[pytest.param({"min-replicas": 0})],
5559
indirect=True,
5660
)
57-
@pytest.mark.dependency(name="test_no_serverless_pods_after_scale_to_zero")
61+
@pytest.mark.order(2)
62+
@pytest.mark.dependency(name=NO_PODS_AFTER_SCALE_TEST_NAME)
5863
def test_no_serverless_pods_after_scale_to_zero(self, admin_client, inference_service_patched_replicas):
5964
"""Verify pods are scaled to zero"""
6065
verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas)
6166

62-
@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
63-
def test_serverless_inference_after_scale_to_zero(self, ovms_kserve_inference_service):
67+
@pytest.mark.dependency(
68+
name=INFERENCE_AFTER_SCALE_TEST_NAME,
69+
depends=[NO_PODS_AFTER_SCALE_TEST_NAME],
70+
)
71+
@pytest.mark.order(3)
72+
def test_serverless_inference_after_scale_to_zero(self, inference_service_patched_replicas):
6473
"""Verify model can be queried after scaling to zero"""
6574
verify_inference_response(
66-
inference_service=ovms_kserve_inference_service,
75+
inference_service=inference_service_patched_replicas,
6776
inference_config=ONNX_INFERENCE_CONFIG,
6877
inference_type=Inference.INFER,
6978
protocol=Protocols.HTTPS,
7079
use_default_query=True,
7180
)
7281

73-
@pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"])
74-
def test_no_serverless_pods_when_no_traffic(self, admin_client, ovms_kserve_inference_service):
82+
@pytest.mark.dependency(
83+
depends=[INFERENCE_AFTER_SCALE_TEST_NAME],
84+
)
85+
@pytest.mark.order(4)
86+
def test_no_serverless_pods_when_no_traffic(self, admin_client, inference_service_patched_replicas):
7587
"""Verify pods are scaled to zero when no traffic is sent"""
76-
verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service)
88+
verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas)
7789

7890
@pytest.mark.parametrize(
7991
"inference_service_patched_replicas",
8092
[pytest.param({"min-replicas": 1})],
8193
indirect=True,
8294
)
95+
@pytest.mark.order(5)
8396
def test_serverless_pods_after_scale_to_one_replica(self, admin_client, inference_service_patched_replicas):
8497
"""Verify pod is running after scaling to 1 replica"""
8598
for deployment in Deployment.get(

tests/model_serving/model_server/storage/minio/test_minio_model_mesh.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@
2929
},
3030
MinIo.PodConfig.KSERVE_MINIO_CONFIG,
3131
MINIO_DATA_CONNECTION_CONFIG,
32-
{"runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE, **RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG},
32+
{
33+
"runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE,
34+
"external-route": True,
35+
**RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG,
36+
},
3337
{
3438
"name": f"{ModelName.MNIST}-model",
3539
"model-format": ModelAndFormat.OPENVINO_IR,

tests/model_serving/model_server/upgrade/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ def caikit_raw_inference_service_scope_session(
290290
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
291291
storage_key=models_endpoint_s3_secret_scope_session.name,
292292
storage_path=ModelStoragePath.EMBEDDING_MODEL,
293-
external_route=True,
293+
external_route=False,
294294
teardown=teardown_resources,
295295
**isvc_kwargs,
296296
) as isvc:

tests/model_serving/model_server/upgrade/test_upgrade.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_raw_caikit_bge_pre_upgrade_inference(self, caikit_raw_inference_service
4242
inference_service=caikit_raw_inference_service_scope_session,
4343
inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG,
4444
inference_type="embedding",
45-
protocol=Protocols.HTTPS,
45+
protocol=Protocols.HTTP,
4646
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
4747
use_default_query=True,
4848
)
@@ -146,7 +146,7 @@ def test_raw_caikit_bge_post_upgrade_inference(self, caikit_raw_inference_servic
146146
inference_service=caikit_raw_inference_service_scope_session,
147147
inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG,
148148
inference_type="embedding",
149-
protocol=Protocols.HTTPS,
149+
protocol=Protocols.HTTP,
150150
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
151151
use_default_query=True,
152152
)

tests/model_serving/model_server/utils.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,17 @@ def verify_inference_response(
152152
if isinstance(response, list):
153153
response = response[0]
154154

155-
response_text = response[inference.inference_response_text_key_name]
156-
assert response_text == expected_response_text, (
157-
f"Expected: {expected_response_text} does not mathc response: {response_text}"
158-
)
155+
if isinstance(response, dict):
156+
response_text = response[inference.inference_response_text_key_name]
157+
assert response_text == expected_response_text, (
158+
f"Expected: {expected_response_text} does not match response: {response_text}"
159+
)
160+
161+
else:
162+
raise InferenceResponseError(
163+
"Inference response output does not match expected output format."
164+
f"Expected: {expected_response_text}.\nResponse: {res}"
165+
)
159166

160167
else:
161168
raise InferenceResponseError(f"Inference response output not found in response. Response: {res}")

utilities/infra.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pytest
1010
from _pytest.fixtures import FixtureRequest
1111
from kubernetes.dynamic import DynamicClient
12-
from kubernetes.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError
12+
from kubernetes.dynamic.exceptions import NotFoundError, ResourceNotFoundError, ResourceNotUniqueError
1313
from ocp_resources.catalog_source import CatalogSource
1414
from ocp_resources.cluster_service_version import ClusterServiceVersion
1515
from ocp_resources.config_map import ConfigMap
@@ -645,9 +645,7 @@ def verify_no_failed_pods(
645645
is_waiting_pull_back_off = (
646646
wait_state := container_status.state.waiting
647647
) and wait_state.reason in (
648-
pod.Status.IMAGE_PULL_BACK_OFF,
649648
pod.Status.CRASH_LOOPBACK_OFF,
650-
pod.Status.ERR_IMAGE_PULL,
651649
"InvalidImageName",
652650
)
653651

@@ -664,8 +662,6 @@ def verify_no_failed_pods(
664662
elif pod_status.phase in (
665663
pod.Status.CRASH_LOOPBACK_OFF,
666664
pod.Status.FAILED,
667-
pod.Status.IMAGE_PULL_BACK_OFF,
668-
pod.Status.ERR_IMAGE_PULL,
669665
):
670666
failed_pods[pod.name] = pod_status
671667

@@ -800,13 +796,17 @@ def wait_for_serverless_pods_deletion(resource: Project | Namespace, admin_clien
800796
"""
801797
client = admin_client or get_client()
802798
for pod in Pod.get(dyn_client=client, namespace=resource.name):
803-
if (
804-
pod.exists
805-
and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE)
806-
== KServeDeploymentType.SERVERLESS
807-
):
808-
LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted")
809-
pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN)
799+
try:
800+
if (
801+
pod.exists
802+
and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE)
803+
== KServeDeploymentType.SERVERLESS
804+
):
805+
LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted")
806+
pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN)
807+
808+
except (ResourceNotFoundError, NotFoundError):
809+
LOGGER.info(f"Pod {pod.name} is deleted")
810810

811811

812812
@retry(

0 commit comments

Comments
 (0)