diff --git a/tests/model_serving/model_server/serverless/conftest.py b/tests/model_serving/model_server/serverless/conftest.py index dc1b09175..c56c804ac 100644 --- a/tests/model_serving/model_server/serverless/conftest.py +++ b/tests/model_serving/model_server/serverless/conftest.py @@ -22,15 +22,16 @@ def inference_service_patched_replicas( request: FixtureRequest, ovms_kserve_inference_service: InferenceService ) -> InferenceService: - ResourceEditor( - patches={ - ovms_kserve_inference_service: { - "spec": { - "predictor": {"minReplicas": request.param["min-replicas"]}, + if hasattr(request, "param"): + ResourceEditor( + patches={ + ovms_kserve_inference_service: { + "spec": { + "predictor": {"minReplicas": request.param["min-replicas"]}, + } } } - } - ).update() + ).update() return ovms_kserve_inference_service diff --git a/tests/model_serving/model_server/serverless/test_scale_to_zero.py b/tests/model_serving/model_server/serverless/test_scale_to_zero.py index 43304a651..10a67f322 100644 --- a/tests/model_serving/model_server/serverless/test_scale_to_zero.py +++ b/tests/model_serving/model_server/serverless/test_scale_to_zero.py @@ -20,6 +20,9 @@ pytest.mark.usefixtures("valid_aws_config"), ] +NO_PODS_AFTER_SCALE_TEST_NAME: str = "test_no_serverless_pods_after_scale_to_zero" +INFERENCE_AFTER_SCALE_TEST_NAME: str = "test_serverless_inference_after_scale_to_zero" + @pytest.mark.serverless @pytest.mark.parametrize( @@ -39,6 +42,7 @@ indirect=True, ) class TestServerlessScaleToZero: + @pytest.mark.order(1) def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service): """Verify model can be queried before scaling to zero""" verify_inference_response( @@ -54,32 +58,41 @@ def test_serverless_before_scale_to_zero(self, ovms_kserve_inference_service): [pytest.param({"min-replicas": 0})], indirect=True, ) - @pytest.mark.dependency(name="test_no_serverless_pods_after_scale_to_zero") + @pytest.mark.order(2) + @pytest.mark.dependency(name=NO_PODS_AFTER_SCALE_TEST_NAME) def test_no_serverless_pods_after_scale_to_zero(self, admin_client, inference_service_patched_replicas): """Verify pods are scaled to zero""" verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas) - @pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"]) - def test_serverless_inference_after_scale_to_zero(self, ovms_kserve_inference_service): + @pytest.mark.dependency( + name=INFERENCE_AFTER_SCALE_TEST_NAME, + depends=[NO_PODS_AFTER_SCALE_TEST_NAME], + ) + @pytest.mark.order(3) + def test_serverless_inference_after_scale_to_zero(self, inference_service_patched_replicas): """Verify model can be queried after scaling to zero""" verify_inference_response( - inference_service=ovms_kserve_inference_service, + inference_service=inference_service_patched_replicas, inference_config=ONNX_INFERENCE_CONFIG, inference_type=Inference.INFER, protocol=Protocols.HTTPS, use_default_query=True, ) - @pytest.mark.dependency(depends=["test_no_serverless_pods_after_scale_to_zero"]) - def test_no_serverless_pods_when_no_traffic(self, admin_client, ovms_kserve_inference_service): + @pytest.mark.dependency( + depends=[INFERENCE_AFTER_SCALE_TEST_NAME], + ) + @pytest.mark.order(4) + def test_no_serverless_pods_when_no_traffic(self, admin_client, inference_service_patched_replicas): """Verify pods are scaled to zero when no traffic is sent""" - verify_no_inference_pods(client=admin_client, isvc=ovms_kserve_inference_service) + verify_no_inference_pods(client=admin_client, isvc=inference_service_patched_replicas) @pytest.mark.parametrize( "inference_service_patched_replicas", [pytest.param({"min-replicas": 1})], indirect=True, ) + @pytest.mark.order(5) def test_serverless_pods_after_scale_to_one_replica(self, admin_client, inference_service_patched_replicas): """Verify pod is running after scaling to 1 replica""" for deployment in Deployment.get( diff --git a/tests/model_serving/model_server/storage/minio/test_minio_model_mesh.py b/tests/model_serving/model_server/storage/minio/test_minio_model_mesh.py index 4c98c718f..7bd8b6937 100644 --- a/tests/model_serving/model_server/storage/minio/test_minio_model_mesh.py +++ b/tests/model_serving/model_server/storage/minio/test_minio_model_mesh.py @@ -29,7 +29,11 @@ }, MinIo.PodConfig.KSERVE_MINIO_CONFIG, MINIO_DATA_CONNECTION_CONFIG, - {"runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE, **RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG}, + { + "runtime_image": MinIo.PodConfig.KSERVE_MINIO_IMAGE, + "external-route": True, + **RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG, + }, { "name": f"{ModelName.MNIST}-model", "model-format": ModelAndFormat.OPENVINO_IR, diff --git a/tests/model_serving/model_server/upgrade/conftest.py b/tests/model_serving/model_server/upgrade/conftest.py index 210672480..672be5a49 100644 --- a/tests/model_serving/model_server/upgrade/conftest.py +++ b/tests/model_serving/model_server/upgrade/conftest.py @@ -290,7 +290,7 @@ def caikit_raw_inference_service_scope_session( deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, storage_key=models_endpoint_s3_secret_scope_session.name, storage_path=ModelStoragePath.EMBEDDING_MODEL, - external_route=True, + external_route=False, teardown=teardown_resources, **isvc_kwargs, ) as isvc: diff --git a/tests/model_serving/model_server/upgrade/test_upgrade.py b/tests/model_serving/model_server/upgrade/test_upgrade.py index ee54d822b..6dc36f2f4 100644 --- a/tests/model_serving/model_server/upgrade/test_upgrade.py +++ b/tests/model_serving/model_server/upgrade/test_upgrade.py @@ -42,7 +42,7 @@ def test_raw_caikit_bge_pre_upgrade_inference(self, caikit_raw_inference_service inference_service=caikit_raw_inference_service_scope_session, inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG, inference_type="embedding", - protocol=Protocols.HTTPS, + protocol=Protocols.HTTP, model_name=ModelName.CAIKIT_BGE_LARGE_EN, use_default_query=True, ) @@ -146,7 +146,7 @@ def test_raw_caikit_bge_post_upgrade_inference(self, caikit_raw_inference_servic inference_service=caikit_raw_inference_service_scope_session, inference_config=CAIKIT_STANDALONE_INFERENCE_CONFIG, inference_type="embedding", - protocol=Protocols.HTTPS, + protocol=Protocols.HTTP, model_name=ModelName.CAIKIT_BGE_LARGE_EN, use_default_query=True, ) diff --git a/tests/model_serving/model_server/utils.py b/tests/model_serving/model_server/utils.py index 44b9601f4..47f35498a 100644 --- a/tests/model_serving/model_server/utils.py +++ b/tests/model_serving/model_server/utils.py @@ -152,10 +152,17 @@ def verify_inference_response( if isinstance(response, list): response = response[0] - response_text = response[inference.inference_response_text_key_name] - assert response_text == expected_response_text, ( - f"Expected: {expected_response_text} does not mathc response: {response_text}" - ) + if isinstance(response, dict): + response_text = response[inference.inference_response_text_key_name] + assert response_text == expected_response_text, ( + f"Expected: {expected_response_text} does not match response: {response_text}" + ) + + else: + raise InferenceResponseError( + "Inference response output does not match expected output format." + f"Expected: {expected_response_text}.\nResponse: {res}" + ) else: raise InferenceResponseError(f"Inference response output not found in response. Response: {res}") diff --git a/utilities/infra.py b/utilities/infra.py index cd3ed778b..dcac20180 100644 --- a/utilities/infra.py +++ b/utilities/infra.py @@ -9,7 +9,7 @@ import pytest from _pytest.fixtures import FixtureRequest from kubernetes.dynamic import DynamicClient -from kubernetes.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError +from kubernetes.dynamic.exceptions import NotFoundError, ResourceNotFoundError, ResourceNotUniqueError from ocp_resources.catalog_source import CatalogSource from ocp_resources.cluster_service_version import ClusterServiceVersion from ocp_resources.config_map import ConfigMap @@ -645,9 +645,7 @@ def verify_no_failed_pods( is_waiting_pull_back_off = ( wait_state := container_status.state.waiting ) and wait_state.reason in ( - pod.Status.IMAGE_PULL_BACK_OFF, pod.Status.CRASH_LOOPBACK_OFF, - pod.Status.ERR_IMAGE_PULL, "InvalidImageName", ) @@ -664,8 +662,6 @@ def verify_no_failed_pods( elif pod_status.phase in ( pod.Status.CRASH_LOOPBACK_OFF, pod.Status.FAILED, - pod.Status.IMAGE_PULL_BACK_OFF, - pod.Status.ERR_IMAGE_PULL, ): failed_pods[pod.name] = pod_status @@ -800,13 +796,17 @@ def wait_for_serverless_pods_deletion(resource: Project | Namespace, admin_clien """ client = admin_client or get_client() for pod in Pod.get(dyn_client=client, namespace=resource.name): - if ( - pod.exists - and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE) - == KServeDeploymentType.SERVERLESS - ): - LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted") - pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN) + try: + if ( + pod.exists + and pod.instance.metadata.annotations.get(Annotations.KserveIo.DEPLOYMENT_MODE) + == KServeDeploymentType.SERVERLESS + ): + LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted") + pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN) + + except (ResourceNotFoundError, NotFoundError): + LOGGER.info(f"Pod {pod.name} is deleted") @retry(