From 1a2d2c9700caa280d2dcc13d0c55878e9c150b3d Mon Sep 17 00:00:00 2001 From: Milind Waykole Date: Thu, 27 Nov 2025 18:17:46 +0530 Subject: [PATCH 1/4] remove kueue test from smoek 2.22 Signed-off-by: Milind Waykole --- tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py | 2 -- .../model_server/kueue/test_kueue_isvc_serverless.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py b/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py index d25abfc17..31b8d9734 100644 --- a/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py +++ b/tests/model_serving/model_server/kueue/test_kueue_isvc_raw.py @@ -12,10 +12,8 @@ pytestmark = [ pytest.mark.rawdeployment, - pytest.mark.sanity, pytest.mark.usefixtures("valid_aws_config"), pytest.mark.kueue, - pytest.mark.smoke, ] NAMESPACE_NAME = "kueue-isvc-raw-test" diff --git a/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py b/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py index 316762634..689e7f950 100644 --- a/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py +++ b/tests/model_serving/model_server/kueue/test_kueue_isvc_serverless.py @@ -13,10 +13,8 @@ pytestmark = [ pytest.mark.serverless, - pytest.mark.sanity, pytest.mark.usefixtures("valid_aws_config"), pytest.mark.kueue, - pytest.mark.smoke, ] NAMESPACE_NAME = "kueue-isvc-serverless-test" From e524a1ae9d8b9039cd10fe624b460b907b09e4d6 Mon Sep 17 00:00:00 2001 From: Milind Waykole Date: Wed, 3 Dec 2025 13:56:52 +0530 Subject: [PATCH 2/4] Fix tierdown deletion of project and migrate to ovms test from caikit Signed-off-by: Milind Waykole --- .../model_server/authentication/conftest.py | 216 ++++++++++++++++++ .../test_kserve_token_authentication_raw.py | 67 +++--- ..._kserve_token_authentication_serverless.py | 95 +++----- .../authentication/test_non_admin_users.py | 28 +-- tests/model_serving/model_server/conftest.py | 66 ++++++ .../metrics/test_non_admin_users.py | 30 ++- utilities/infra.py | 35 +-- 7 files changed, 388 insertions(+), 149 deletions(-) diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index 23a303cc2..857034dea 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -25,6 +25,10 @@ Protocols, ModelInferenceRuntime, RuntimeTemplates, + ModelStoragePath, + RunTimeConfigs, + ModelAndFormat, + ModelVersion, ) from utilities.jira import is_jira_open from utilities.logger import RedactedString @@ -400,3 +404,215 @@ def http_model_mesh_inference_token( ci_service_account: ServiceAccount, http_model_mesh_role_binding: RoleBinding ) -> str: return RedactedString(value=create_inference_token(model_service_account=ci_service_account)) + + +@pytest.fixture(scope="class") +def ovms_kserve_serving_runtime_auth( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, +) -> Generator[ServingRuntime, Any, Any]: + with ServingRuntimeFromTemplate( + client=unprivileged_client, + name=f"{Protocols.HTTP}-ovms-runtime", + namespace=unprivileged_model_namespace.name, + template_name=RuntimeTemplates.OVMS_KSERVE, + multi_model=False, + model_format_name=RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG["model-format"], + resources={ + ModelFormat.OVMS: { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + } + }, + ) as model_runtime: + yield model_runtime + + +@pytest.fixture(scope="class") +def http_ovms_serverless_inference_service( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", + namespace=unprivileged_model_namespace.name, + runtime=ovms_kserve_serving_runtime_auth.name, + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.SERVERLESS, + enable_auth=True, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_ovms_raw_inference_service( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, + model_service_account: ServiceAccount, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", + namespace=unprivileged_model_namespace.name, + runtime=ovms_kserve_serving_runtime_auth.name, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + model_service_account=model_service_account.name, + enable_auth=True, + external_route=True, + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_ovms_raw_inference_service_2( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, + model_service_account_2: ServiceAccount, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}-2", + namespace=unprivileged_model_namespace.name, + runtime=ovms_kserve_serving_runtime_auth.name, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + model_service_account=model_service_account_2.name, + enable_auth=True, + external_route=True, + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_ovms_view_role( + unprivileged_client: DynamicClient, + http_ovms_serverless_inference_service: InferenceService, +) -> Generator[Role, Any, Any]: + with create_isvc_view_role( + client=unprivileged_client, + isvc=http_ovms_serverless_inference_service, + name=f"{http_ovms_serverless_inference_service.name}-view", + resource_names=[http_ovms_serverless_inference_service.name], + ) as role: + yield role + + +@pytest.fixture(scope="class") +def http_ovms_raw_view_role( + unprivileged_client: DynamicClient, + http_ovms_raw_inference_service: InferenceService, +) -> Generator[Role, Any, Any]: + with create_isvc_view_role( + client=unprivileged_client, + isvc=http_ovms_raw_inference_service, + name=f"{http_ovms_raw_inference_service.name}-view", + resource_names=[http_ovms_raw_inference_service.name], + ) as role: + yield role + + +@pytest.fixture(scope="class") +def http_ovms_role_binding( + unprivileged_client: DynamicClient, + http_ovms_view_role: Role, + model_service_account: ServiceAccount, + http_ovms_serverless_inference_service: InferenceService, +) -> Generator[RoleBinding, Any, Any]: + with RoleBinding( + client=unprivileged_client, + namespace=model_service_account.namespace, + name=f"{Protocols.HTTP}-{model_service_account.name}-ovms-view", + role_ref_name=http_ovms_view_role.name, + role_ref_kind=http_ovms_view_role.kind, + subjects_kind=model_service_account.kind, + subjects_name=model_service_account.name, + ) as rb: + yield rb + + +@pytest.fixture(scope="class") +def http_ovms_raw_role_binding( + unprivileged_client: DynamicClient, + http_ovms_raw_view_role: Role, + model_service_account: ServiceAccount, + http_ovms_raw_inference_service: InferenceService, +) -> Generator[RoleBinding, Any, Any]: + with RoleBinding( + client=unprivileged_client, + namespace=model_service_account.namespace, + name=f"{Protocols.HTTP}-{model_service_account.name}-ovms-view", + role_ref_name=http_ovms_raw_view_role.name, + role_ref_kind=http_ovms_raw_view_role.kind, + subjects_kind=model_service_account.kind, + subjects_name=model_service_account.name, + ) as rb: + yield rb + + +@pytest.fixture(scope="class") +def http_ovms_inference_token(model_service_account: ServiceAccount, http_ovms_role_binding: RoleBinding) -> str: + return RedactedString(value=create_inference_token(model_service_account=model_service_account)) + + +@pytest.fixture(scope="class") +def http_ovms_raw_inference_token(model_service_account: ServiceAccount, http_ovms_raw_role_binding: RoleBinding) -> str: + return RedactedString(value=create_inference_token(model_service_account=model_service_account)) + + +@pytest.fixture() +def patched_remove_ovms_authentication_isvc( + http_ovms_serverless_inference_service: InferenceService, +) -> Generator[InferenceService, Any, Any]: + with ResourceEditor( + patches={ + http_ovms_serverless_inference_service: { + "metadata": { + "annotations": {Annotations.KserveAuth.SECURITY: "false"}, + } + } + } + ): + yield http_ovms_serverless_inference_service + + +@pytest.fixture() +def patched_remove_ovms_raw_authentication_isvc( + admin_client: DynamicClient, + unprivileged_client: DynamicClient, + http_ovms_raw_inference_service: InferenceService, +) -> Generator[InferenceService, Any, Any]: + predictor_pod = get_pods_by_isvc_label( + client=unprivileged_client, + isvc=http_ovms_raw_inference_service, + )[0] + + with ResourceEditor( + patches={ + http_ovms_raw_inference_service: { + "metadata": { + "annotations": {Annotations.KserveAuth.SECURITY: "false"}, + } + } + } + ): + if is_jira_open(jira_id="RHOAIENG-19275", admin_client=admin_client): + predictor_pod.wait_deleted() + + yield http_ovms_raw_inference_service diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py index f276fbd39..42cc0a45f 100644 --- a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py +++ b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py @@ -2,23 +2,22 @@ from ocp_resources.resource import ResourceEditor from tests.model_serving.model_server.utils import verify_inference_response -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.constants import Annotations from utilities.inference_utils import Inference, UserInference from utilities.infra import check_pod_status_in_time, get_pods_by_isvc_label from utilities.jira import is_jira_open -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG pytestmark = pytest.mark.usefixtures("valid_aws_config") @pytest.mark.rawdeployment @pytest.mark.parametrize( - "unprivileged_model_namespace, s3_models_storage_uri", + "unprivileged_model_namespace", [ pytest.param( {"name": "kserve-raw-token-authentication"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -27,42 +26,40 @@ class TestKserveTokenAuthenticationRawForRest: @pytest.mark.smoke @pytest.mark.ocp_interop @pytest.mark.dependency(name="test_model_authentication_using_rest_raw") - def test_model_authentication_using_rest_raw(self, http_s3_caikit_raw_inference_service, http_raw_inference_token): + def test_model_authentication_using_rest_raw(self, http_ovms_raw_inference_service, http_ovms_raw_inference_token): """Verify RAW Kserve model query with token using REST""" verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, ) @pytest.mark.dependency(name="test_disabled_raw_model_authentication") - def test_disabled_raw_model_authentication(self, patched_remove_raw_authentication_isvc): + def test_disabled_raw_model_authentication(self, patched_remove_ovms_raw_authentication_isvc): """Verify model query after authentication is disabled""" verify_inference_response( - inference_service=patched_remove_raw_authentication_isvc, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=patched_remove_ovms_raw_authentication_isvc, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.sanity @pytest.mark.jira("RHOAIENG-19275", run=False) - def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_raw_inference_service): + def test_raw_disable_enable_authentication_no_pod_rollout(self, http_ovms_raw_inference_service): """Verify no pod rollout when disabling and enabling authentication""" pod = get_pods_by_isvc_label( - client=http_s3_caikit_raw_inference_service.client, - isvc=http_s3_caikit_raw_inference_service, + client=http_ovms_raw_inference_service.client, + isvc=http_ovms_raw_inference_service, )[0] ResourceEditor( patches={ - http_s3_caikit_raw_inference_service: { + http_ovms_raw_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } @@ -74,7 +71,7 @@ def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_r ResourceEditor( patches={ - http_s3_caikit_raw_inference_service: { + http_ovms_raw_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "true"}, } @@ -85,44 +82,42 @@ def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_r check_pod_status_in_time(pod=pod, status={pod.Status.RUNNING}) @pytest.mark.dependency(depends=["test_disabled_raw_model_authentication"]) - def test_re_enabled_raw_model_authentication(self, http_s3_caikit_raw_inference_service, http_raw_inference_token): + def test_re_enabled_raw_model_authentication(self, http_ovms_raw_inference_service, http_ovms_raw_inference_token): """Verify model query after authentication is re-enabled""" verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, ) @pytest.mark.dependency(name="test_cross_model_authentication_raw") def test_cross_model_authentication_raw( - self, http_s3_caikit_raw_inference_service_2, http_raw_inference_token, admin_client + self, http_ovms_raw_inference_service_2, http_ovms_raw_inference_token, admin_client ): """Verify model with another model token""" if is_jira_open(jira_id="RHOAIENG-19645", admin_client=admin_client): inference = UserInference( - inference_service=http_s3_caikit_raw_inference_service_2, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service_2, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, ) res = inference.run_inference_flow( - model_name=ModelFormat.CAIKIT, use_default_query=True, token=http_raw_inference_token, insecure=False + use_default_query=True, token=http_ovms_raw_inference_token, insecure=False ) status_line = res["output"].splitlines()[0] assert "302 Found" in status_line, f"Expected '302 Found' in status line, got: {status_line}" else: verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service_2, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service_2, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, authorized_user=False, ) diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py index 7a5534594..24b12deab 100644 --- a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py +++ b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py @@ -2,20 +2,19 @@ from ocp_resources.resource import ResourceEditor from tests.model_serving.model_server.utils import verify_inference_response -from utilities.constants import Annotations, ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Annotations, Protocols from utilities.inference_utils import Inference from utilities.infra import check_pod_status_in_time, get_pods_by_isvc_label -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG pytestmark = [pytest.mark.serverless, pytest.mark.usefixtures("valid_aws_config")] @pytest.mark.parametrize( - "unprivileged_model_namespace, s3_models_storage_uri", + "unprivileged_model_namespace", [ pytest.param( {"name": "kserve-token-authentication"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -24,107 +23,74 @@ class TestKserveServerlessTokenAuthentication: @pytest.mark.smoke @pytest.mark.ocp_interop @pytest.mark.dependency(name="test_model_authentication_using_rest") - def test_model_authentication_using_rest(self, http_s3_caikit_serverless_inference_service, http_inference_token): + def test_model_authentication_using_rest(self, http_ovms_serverless_inference_service, http_ovms_inference_token): """Verify model query with token using REST""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_inference_token, - ) - - @pytest.mark.smoke - @pytest.mark.ocp_interop - def test_model_authentication_using_grpc(self, grpc_s3_inference_service, grpc_inference_token): - """Verify model query with token using GRPC""" - verify_inference_response( - inference_service=grpc_s3_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.STREAMING, - protocol=Protocols.GRPC, - model_name=ModelFormat.CAIKIT, - use_default_query=True, - token=grpc_inference_token, + token=http_ovms_inference_token, ) @pytest.mark.dependency(name="test_disabled_model_authentication") - def test_disabled_model_authentication(self, patched_remove_authentication_isvc): + def test_disabled_model_authentication(self, patched_remove_ovms_authentication_isvc): """Verify model query after authentication is disabled""" verify_inference_response( - inference_service=patched_remove_authentication_isvc, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=patched_remove_ovms_authentication_isvc, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.dependency(depends=["test_disabled_model_authentication"]) - def test_re_enabled_model_authentication(self, http_s3_caikit_serverless_inference_service, http_inference_token): + def test_re_enabled_model_authentication(self, http_ovms_serverless_inference_service, http_ovms_inference_token): """Verify model query after authentication is re-enabled""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_inference_token, + token=http_ovms_inference_token, ) - def test_model_authentication_using_invalid_token(self, http_s3_caikit_serverless_inference_service): + def test_model_authentication_using_invalid_token(self, http_ovms_serverless_inference_service): """Verify model query with an invalid token""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, token="dummy", authorized_user=False, ) - def test_model_authentication_without_token(self, http_s3_caikit_serverless_inference_service): + def test_model_authentication_without_token(self, http_ovms_serverless_inference_service): """Verify model query without providing a token""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, - protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, - use_default_query=True, - authorized_user=False, - ) - - @pytest.mark.sanity - def test_block_cross_model_authentication(self, http_s3_caikit_serverless_inference_service, grpc_inference_token): - """Verify model query with a second model's token is blocked""" - verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=grpc_inference_token, authorized_user=False, ) @pytest.mark.sanity - def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_serverless_inference_service): + def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_ovms_serverless_inference_service): """Verify no pod rollout when disabling and enabling authentication""" pod = get_pods_by_isvc_label( - client=http_s3_caikit_serverless_inference_service.client, - isvc=http_s3_caikit_serverless_inference_service, + client=http_ovms_serverless_inference_service.client, + isvc=http_ovms_serverless_inference_service, )[0] ResourceEditor( patches={ - http_s3_caikit_serverless_inference_service: { + http_ovms_serverless_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } @@ -136,7 +102,7 @@ def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_s3_c ResourceEditor( patches={ - http_s3_caikit_serverless_inference_service: { + http_ovms_serverless_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "true"}, } @@ -145,3 +111,4 @@ def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_s3_c ).update() check_pod_status_in_time(pod=pod, status={pod.Status.RUNNING}) + diff --git a/tests/model_serving/model_server/authentication/test_non_admin_users.py b/tests/model_serving/model_server/authentication/test_non_admin_users.py index bcbddba85..7380ee30b 100644 --- a/tests/model_serving/model_server/authentication/test_non_admin_users.py +++ b/tests/model_serving/model_server/authentication/test_non_admin_users.py @@ -3,17 +3,16 @@ from tests.model_serving.model_server.utils import ( verify_inference_response, ) -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.inference_utils import Inference -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_serverless_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-serverless"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -22,24 +21,22 @@ @pytest.mark.serverless class TestServerlessUnprivilegedUser: @pytest.mark.polarion("ODS-2552") - def test_non_admin_deploy_serverless_and_query_model(self, unprivileged_s3_caikit_serverless_inference_service): + def test_non_admin_deploy_serverless_and_query_model(self, unprivileged_ovms_serverless_inference_service): """Verify non admin can deploy a model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_raw_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-raw"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_HF}, ) ], indirect=True, @@ -50,14 +47,13 @@ class TestRawUnprivilegedUser: @pytest.mark.polarion("ODS-2611") def test_non_admin_deploy_raw_and_query_model( self, - unprivileged_s3_caikit_raw_inference_service, + unprivileged_ovms_raw_inference_service, ): """Verify non admin can deploy a Raw model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTP, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index 831048656..19cd5eaab 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -577,6 +577,72 @@ def unprivileged_s3_caikit_serverless_inference_service( yield isvc +@pytest.fixture(scope="class") +def unprivileged_ovms_kserve_serving_runtime( + admin_client: DynamicClient, + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, +) -> Generator[ServingRuntime, Any, Any]: + with ServingRuntimeFromTemplate( + client=admin_client, + unprivileged_client=unprivileged_client, + name=f"{Protocols.HTTP}-ovms-runtime", + namespace=unprivileged_model_namespace.name, + template_name=RuntimeTemplates.OVMS_KSERVE, + multi_model=False, + model_format_name={ModelFormat.ONNX: ModelVersion.OPSET13}, + resources={ + ModelFormat.OVMS: { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + } + }, + ) as model_runtime: + yield model_runtime + + +@pytest.fixture(scope="class") +def unprivileged_ovms_serverless_inference_service( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + unprivileged_ovms_kserve_serving_runtime: ServingRuntime, + ci_endpoint_s3_secret: Secret, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", + namespace=unprivileged_model_namespace.name, + runtime=unprivileged_ovms_kserve_serving_runtime.name, + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.SERVERLESS, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def unprivileged_ovms_raw_inference_service( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + unprivileged_ovms_kserve_serving_runtime: ServingRuntime, + ci_endpoint_s3_secret: Secret, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", + namespace=unprivileged_model_namespace.name, + runtime=unprivileged_ovms_kserve_serving_runtime.name, + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + @pytest.fixture(scope="package") def fail_if_missing_dependent_operators(admin_client: DynamicClient) -> None: if dependent_operators := py_config.get("dependent_operators"): diff --git a/tests/model_serving/model_server/metrics/test_non_admin_users.py b/tests/model_serving/model_server/metrics/test_non_admin_users.py index 135e3d978..3b7d3a14d 100644 --- a/tests/model_serving/model_server/metrics/test_non_admin_users.py +++ b/tests/model_serving/model_server/metrics/test_non_admin_users.py @@ -4,18 +4,17 @@ run_inference_multiple_times, verify_inference_response, ) -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.inference_utils import Inference -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG from utilities.monitoring import validate_metrics_field @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_serverless_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-serverless"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -24,24 +23,22 @@ @pytest.mark.serverless class TestServerlessUnprivilegedUser: @pytest.mark.polarion("ODS-2552") - def test_non_admin_deploy_serverless_and_query_metrics(self, unprivileged_s3_caikit_serverless_inference_service): + def test_non_admin_deploy_serverless_and_query_metrics(self, unprivileged_ovms_serverless_inference_service): """Verify non admin can deploy a model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_raw_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-metrics"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_HF}, ) ], indirect=True, @@ -52,7 +49,7 @@ class TestRawUnprivilegedUserMetrics: @pytest.mark.metrics def test_non_admin_raw_metrics( self, - unprivileged_s3_caikit_raw_inference_service, + unprivileged_ovms_raw_inference_service, prometheus, user_workload_monitoring_config_map, ): @@ -60,15 +57,14 @@ def test_non_admin_raw_metrics( total_runs = 5 run_inference_multiple_times( - isvc=unprivileged_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + isvc=unprivileged_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTP, - model_name=ModelFormat.CAIKIT, iterations=total_runs, ) validate_metrics_field( prometheus=prometheus, - metrics_query="tgi_request_count", + metrics_query="ovms_requests_success", expected_value=str(total_runs), ) diff --git a/utilities/infra.py b/utilities/infra.py index 4cb94f778..a7d83c3c5 100644 --- a/utilities/infra.py +++ b/utilities/infra.py @@ -140,23 +140,26 @@ def create_ns( namespace_kwargs["label"][Labels.Kueue.MANAGED] = "true" # type: ignore if unprivileged_client: - with ProjectRequest(name=name, client=unprivileged_client, teardown=teardown): - project = Project(**namespace_kwargs) - project.wait_for_status(status=project.Status.ACTIVE, timeout=Timeout.TIMEOUT_2MIN) - if _labels := namespace_kwargs.get("label", {}): - # To patch the namespace, admin client is required - ns = Namespace(client=get_client(), name=name) - ResourceEditor({ - ns: { - "metadata": { - "labels": _labels, - } + namespace_kwargs["client"] = unprivileged_client + project = ProjectRequest(**namespace_kwargs).deploy() + project.wait_for_status(status=project.Status.ACTIVE, timeout=Timeout.TIMEOUT_2MIN) + if _labels := namespace_kwargs.get("label", {}): + # To patch the namespace, admin client is required + ns = Namespace(client=get_client(), name=name) + ResourceEditor({ + ns: { + "metadata": { + "labels": _labels, } - }).update() - yield project - - if teardown: - wait_for_serverless_pods_deletion(resource=project, admin_client=client) + } + }).update() + yield project + + if teardown: + wait_for_serverless_pods_deletion(resource=project, admin_client=get_client()) + # cleanup must be done with admin client + project.client = get_client() + project.clean_up() else: with Namespace(**namespace_kwargs) as ns: From b0c2bf4f1ac2404da79b3077e97edfecf02be48f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 08:28:46 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/model_serving/model_server/authentication/conftest.py | 5 +++-- .../test_kserve_token_authentication_serverless.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index 857034dea..5cbf9a715 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -25,7 +25,6 @@ Protocols, ModelInferenceRuntime, RuntimeTemplates, - ModelStoragePath, RunTimeConfigs, ModelAndFormat, ModelVersion, @@ -572,7 +571,9 @@ def http_ovms_inference_token(model_service_account: ServiceAccount, http_ovms_r @pytest.fixture(scope="class") -def http_ovms_raw_inference_token(model_service_account: ServiceAccount, http_ovms_raw_role_binding: RoleBinding) -> str: +def http_ovms_raw_inference_token( + model_service_account: ServiceAccount, http_ovms_raw_role_binding: RoleBinding +) -> str: return RedactedString(value=create_inference_token(model_service_account=model_service_account)) diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py index 24b12deab..222079df5 100644 --- a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py +++ b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py @@ -111,4 +111,3 @@ def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_ovms ).update() check_pod_status_in_time(pod=pod, status={pod.Status.RUNNING}) - From 86a0a7578e69c8fcd89f87fa02e3e56d93ca4df6 Mon Sep 17 00:00:00 2001 From: Milind Waykole Date: Wed, 3 Dec 2025 14:09:04 +0530 Subject: [PATCH 4/4] fix: remove unused Caikit fixtures after OVMS migration --- .../model_server/authentication/conftest.py | 308 +----------------- tests/model_serving/model_server/conftest.py | 85 ----- 2 files changed, 3 insertions(+), 390 deletions(-) diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index 5cbf9a715..6f7494034 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -1,8 +1,6 @@ from typing import Any, Generator -from urllib.parse import urlparse import pytest -from _pytest.fixtures import FixtureRequest from kubernetes.dynamic import DynamicClient from ocp_resources.inference_service import InferenceService from ocp_resources.namespace import Namespace @@ -23,7 +21,6 @@ KServeDeploymentType, ModelFormat, Protocols, - ModelInferenceRuntime, RuntimeTemplates, RunTimeConfigs, ModelAndFormat, @@ -35,318 +32,19 @@ from utilities.constants import Annotations -# GRPC model serving -@pytest.fixture(scope="class") -def grpc_model_service_account( - unprivileged_client: DynamicClient, models_endpoint_s3_secret: Secret -) -> Generator[ServiceAccount, Any, Any]: - with ServiceAccount( - client=unprivileged_client, - namespace=models_endpoint_s3_secret.namespace, - name=f"{Protocols.GRPC}-models-bucket-sa", - secrets=[{"name": models_endpoint_s3_secret.name}], - ) as sa: - yield sa - - -@pytest.fixture(scope="class") -def grpc_s3_caikit_serving_runtime( - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, -) -> Generator[ServingRuntime, Any, Any]: - with ServingRuntimeFromTemplate( - client=unprivileged_client, - name=f"{Protocols.GRPC}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", - namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, - multi_model=False, - enable_http=False, - enable_grpc=True, - ) as model_runtime: - yield model_runtime - - -@pytest.fixture(scope="class") -def grpc_s3_inference_service( - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - grpc_s3_caikit_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.GRPC}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=grpc_s3_caikit_serving_runtime.name, - model_format=grpc_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, - deployment_mode=KServeDeploymentType.SERVERLESS, - enable_auth=True, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_view_role( - unprivileged_client: DynamicClient, - http_s3_caikit_serverless_inference_service: InferenceService, -) -> Generator[Role, Any, Any]: - with create_isvc_view_role( - client=unprivileged_client, - isvc=http_s3_caikit_serverless_inference_service, - name=f"{http_s3_caikit_serverless_inference_service.name}-view", - resource_names=[http_s3_caikit_serverless_inference_service.name], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def http_raw_view_role( - unprivileged_client: DynamicClient, - http_s3_caikit_raw_inference_service: InferenceService, -) -> Generator[Role, Any, Any]: - with create_isvc_view_role( - client=unprivileged_client, - isvc=http_s3_caikit_raw_inference_service, - name=f"{http_s3_caikit_raw_inference_service.name}-view", - resource_names=[http_s3_caikit_raw_inference_service.name], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def http_role_binding( - unprivileged_client: DynamicClient, - http_view_role: Role, - model_service_account: ServiceAccount, - http_s3_caikit_serverless_inference_service: InferenceService, -) -> Generator[RoleBinding, Any, Any]: - with RoleBinding( - client=unprivileged_client, - namespace=model_service_account.namespace, - name=f"{Protocols.HTTP}-{model_service_account.name}-view", - role_ref_name=http_view_role.name, - role_ref_kind=http_view_role.kind, - subjects_kind=model_service_account.kind, - subjects_name=model_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def http_raw_role_binding( - unprivileged_client: DynamicClient, - http_raw_view_role: Role, - model_service_account: ServiceAccount, - http_s3_caikit_raw_inference_service: InferenceService, -) -> Generator[RoleBinding, Any, Any]: - with RoleBinding( - client=unprivileged_client, - namespace=model_service_account.namespace, - name=f"{Protocols.HTTP}-{model_service_account.name}-view", - role_ref_name=http_raw_view_role.name, - role_ref_kind=http_raw_view_role.kind, - subjects_kind=model_service_account.kind, - subjects_name=model_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def http_inference_token(model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str: - return RedactedString(value=create_inference_token(model_service_account=model_service_account)) - - -@pytest.fixture(scope="class") -def http_raw_inference_token(model_service_account: ServiceAccount, http_raw_role_binding: RoleBinding) -> str: - return RedactedString(value=create_inference_token(model_service_account=model_service_account)) - - -@pytest.fixture() -def patched_remove_authentication_isvc( - http_s3_caikit_serverless_inference_service: InferenceService, -) -> Generator[InferenceService, Any, Any]: - with ResourceEditor( - patches={ - http_s3_caikit_serverless_inference_service: { - "metadata": { - "annotations": {Annotations.KserveAuth.SECURITY: "false"}, - } - } - } - ): - yield http_s3_caikit_serverless_inference_service - - -@pytest.fixture() -def patched_remove_raw_authentication_isvc( - admin_client: DynamicClient, - unprivileged_client: DynamicClient, - http_s3_caikit_raw_inference_service: InferenceService, -) -> Generator[InferenceService, Any, Any]: - predictor_pod = get_pods_by_isvc_label( - client=unprivileged_client, - isvc=http_s3_caikit_raw_inference_service, - )[0] - - with ResourceEditor( - patches={ - http_s3_caikit_raw_inference_service: { - "metadata": { - "annotations": {Annotations.KserveAuth.SECURITY: "false"}, - } - } - } - ): - if is_jira_open(jira_id="RHOAIENG-19275", admin_client=admin_client): - predictor_pod.wait_deleted() - - yield http_s3_caikit_raw_inference_service - - @pytest.fixture(scope="class") def model_service_account_2( - unprivileged_client: DynamicClient, models_endpoint_s3_secret: Secret + unprivileged_client: DynamicClient, ci_endpoint_s3_secret: Secret ) -> Generator[ServiceAccount, Any, Any]: with ServiceAccount( client=unprivileged_client, - namespace=models_endpoint_s3_secret.namespace, + namespace=ci_endpoint_s3_secret.namespace, name="models-bucket-sa-2", - secrets=[{"name": models_endpoint_s3_secret.name}], + secrets=[{"name": ci_endpoint_s3_secret.name}], ) as sa: yield sa -@pytest.fixture(scope="class") -def grpc_view_role( - unprivileged_client: DynamicClient, grpc_s3_inference_service: InferenceService -) -> Generator[Role, Any, Any]: - with create_isvc_view_role( - client=unprivileged_client, - isvc=grpc_s3_inference_service, - name=f"{grpc_s3_inference_service.name}-view", - resource_names=[grpc_s3_inference_service.name], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def grpc_role_binding( - unprivileged_client: DynamicClient, - grpc_view_role: Role, - grpc_model_service_account: ServiceAccount, - grpc_s3_inference_service: InferenceService, -) -> Generator[RoleBinding, Any, Any]: - with RoleBinding( - client=unprivileged_client, - namespace=grpc_model_service_account.namespace, - name=f"{Protocols.GRPC}-{grpc_model_service_account.name}-view", - role_ref_name=grpc_view_role.name, - role_ref_kind=grpc_view_role.kind, - subjects_kind=grpc_model_service_account.kind, - subjects_name=grpc_model_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def grpc_inference_token(grpc_model_service_account: ServiceAccount, grpc_role_binding: RoleBinding) -> str: - return RedactedString(value=create_inference_token(model_service_account=grpc_model_service_account)) - - -@pytest.fixture(scope="class") -def http_s3_caikit_serverless_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.SERVERLESS, - enable_auth=True, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_raw_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, - model_service_account: ServiceAccount, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - model_service_account=model_service_account.name, - enable_auth=True, - external_route=True, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_raw_inference_service_2( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - model_service_account_2: ServiceAccount, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}-2", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - storage_uri=s3_models_storage_uri, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - model_service_account=model_service_account_2.name, - enable_auth=True, - external_route=True, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_tgis_serving_runtime( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, -) -> Generator[ServingRuntime, Any, Any]: - with ServingRuntimeFromTemplate( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", - namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, - multi_model=False, - enable_http=True, - enable_grpc=False, - ) as model_runtime: - yield model_runtime - - @pytest.fixture() def patched_remove_authentication_model_mesh_runtime( http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index 19cd5eaab..f954d5a21 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -21,7 +21,6 @@ KServeDeploymentType, ModelFormat, ModelInferenceRuntime, - ModelStoragePath, Protocols, RuntimeTemplates, StorageClassName, @@ -493,90 +492,6 @@ def http_s3_openvino_second_model_mesh_inference_service( yield isvc -@pytest.fixture(scope="class") -def unprivileged_s3_caikit_raw_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - unprivileged_s3_caikit_serving_runtime: ServingRuntime, - unprivileged_models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}-raw", - namespace=unprivileged_model_namespace.name, - runtime=unprivileged_s3_caikit_serving_runtime.name, - model_format=unprivileged_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - storage_key=unprivileged_models_endpoint_s3_secret.name, - storage_path=ModelStoragePath.FLAN_T5_SMALL_CAIKIT, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def unprivileged_s3_caikit_serving_runtime( - admin_client: DynamicClient, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, -) -> Generator[ServingRuntime, Any, Any]: - with ServingRuntimeFromTemplate( - client=admin_client, - unprivileged_client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", - namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, - multi_model=False, - enable_http=True, - enable_grpc=False, - ) as model_runtime: - yield model_runtime - - -@pytest.fixture(scope="class") -def unprivileged_models_endpoint_s3_secret( - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - aws_access_key_id: str, - aws_secret_access_key: str, - models_s3_bucket_name: str, - models_s3_bucket_region: str, - models_s3_bucket_endpoint: str, -) -> Generator[Secret, Any, Any]: - with s3_endpoint_secret( - client=unprivileged_client, - name="models-bucket-secret", - namespace=unprivileged_model_namespace.name, - aws_access_key=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_s3_region=models_s3_bucket_region, - aws_s3_bucket=models_s3_bucket_name, - aws_s3_endpoint=models_s3_bucket_endpoint, - ) as secret: - yield secret - - -@pytest.fixture(scope="class") -def unprivileged_s3_caikit_serverless_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - unprivileged_s3_caikit_serving_runtime: ServingRuntime, - unprivileged_models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=unprivileged_s3_caikit_serving_runtime.name, - model_format=unprivileged_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.SERVERLESS, - storage_key=unprivileged_models_endpoint_s3_secret.name, - storage_path=request.param["model-dir"], - ) as isvc: - yield isvc - - @pytest.fixture(scope="class") def unprivileged_ovms_kserve_serving_runtime( admin_client: DynamicClient,