diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index 23a303cc2..6f7494034 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -1,8 +1,6 @@ from typing import Any, Generator -from urllib.parse import urlparse import pytest -from _pytest.fixtures import FixtureRequest from kubernetes.dynamic import DynamicClient from ocp_resources.inference_service import InferenceService from ocp_resources.namespace import Namespace @@ -23,8 +21,10 @@ KServeDeploymentType, ModelFormat, Protocols, - ModelInferenceRuntime, RuntimeTemplates, + RunTimeConfigs, + ModelAndFormat, + ModelVersion, ) from utilities.jira import is_jira_open from utilities.logger import RedactedString @@ -32,100 +32,212 @@ from utilities.constants import Annotations -# GRPC model serving @pytest.fixture(scope="class") -def grpc_model_service_account( - unprivileged_client: DynamicClient, models_endpoint_s3_secret: Secret +def model_service_account_2( + unprivileged_client: DynamicClient, ci_endpoint_s3_secret: Secret ) -> Generator[ServiceAccount, Any, Any]: with ServiceAccount( client=unprivileged_client, - namespace=models_endpoint_s3_secret.namespace, - name=f"{Protocols.GRPC}-models-bucket-sa", - secrets=[{"name": models_endpoint_s3_secret.name}], + namespace=ci_endpoint_s3_secret.namespace, + name="models-bucket-sa-2", + secrets=[{"name": ci_endpoint_s3_secret.name}], ) as sa: yield sa +@pytest.fixture() +def patched_remove_authentication_model_mesh_runtime( + http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, +) -> Generator[ServingRuntime, Any, Any]: + with ResourceEditor( + patches={ + http_s3_ovms_model_mesh_serving_runtime: { + "metadata": { + "annotations": {"enable-auth": "false"}, + } + } + } + ): + yield http_s3_ovms_model_mesh_serving_runtime + + +@pytest.fixture(scope="class") +def http_model_mesh_view_role( + unprivileged_client: DynamicClient, + http_s3_openvino_model_mesh_inference_service: InferenceService, + http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, +) -> Generator[Role, Any, Any]: + with Role( + client=unprivileged_client, + name=f"{http_s3_openvino_model_mesh_inference_service.name}-view", + namespace=http_s3_openvino_model_mesh_inference_service.namespace, + rules=[ + {"apiGroups": [""], "resources": ["services"], "verbs": ["get"]}, + ], + ) as role: + yield role + + +@pytest.fixture(scope="class") +def http_model_mesh_role_binding( + unprivileged_client: DynamicClient, + http_model_mesh_view_role: Role, + ci_service_account: ServiceAccount, +) -> Generator[RoleBinding, Any, Any]: + with RoleBinding( + client=unprivileged_client, + namespace=ci_service_account.namespace, + name=f"{Protocols.HTTP}-{ci_service_account.name}-view", + role_ref_name=http_model_mesh_view_role.name, + role_ref_kind=http_model_mesh_view_role.kind, + subjects_kind=ci_service_account.kind, + subjects_name=ci_service_account.name, + ) as rb: + yield rb + + +@pytest.fixture(scope="class") +def http_model_mesh_inference_token( + ci_service_account: ServiceAccount, http_model_mesh_role_binding: RoleBinding +) -> str: + return RedactedString(value=create_inference_token(model_service_account=ci_service_account)) + + @pytest.fixture(scope="class") -def grpc_s3_caikit_serving_runtime( +def ovms_kserve_serving_runtime_auth( unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace, ) -> Generator[ServingRuntime, Any, Any]: with ServingRuntimeFromTemplate( client=unprivileged_client, - name=f"{Protocols.GRPC}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", + name=f"{Protocols.HTTP}-ovms-runtime", namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, + template_name=RuntimeTemplates.OVMS_KSERVE, multi_model=False, - enable_http=False, - enable_grpc=True, + model_format_name=RunTimeConfigs.ONNX_OPSET13_RUNTIME_CONFIG["model-format"], + resources={ + ModelFormat.OVMS: { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + } + }, ) as model_runtime: yield model_runtime @pytest.fixture(scope="class") -def grpc_s3_inference_service( +def http_ovms_serverless_inference_service( unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace, - grpc_s3_caikit_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, ) -> Generator[InferenceService, Any, Any]: with create_isvc( client=unprivileged_client, - name=f"{Protocols.GRPC}-{ModelFormat.CAIKIT}", + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", namespace=unprivileged_model_namespace.name, - runtime=grpc_s3_caikit_serving_runtime.name, - model_format=grpc_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, + runtime=ovms_kserve_serving_runtime_auth.name, + model_format=ModelAndFormat.OPENVINO_IR, deployment_mode=KServeDeploymentType.SERVERLESS, enable_auth=True, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_ovms_raw_inference_service( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, + model_service_account: ServiceAccount, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", + namespace=unprivileged_model_namespace.name, + runtime=ovms_kserve_serving_runtime_auth.name, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + model_service_account=model_service_account.name, + enable_auth=True, + external_route=True, + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_ovms_raw_inference_service_2( + unprivileged_client: DynamicClient, + unprivileged_model_namespace: Namespace, + ovms_kserve_serving_runtime_auth: ServingRuntime, + ci_endpoint_s3_secret: Secret, + model_service_account_2: ServiceAccount, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=unprivileged_client, + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}-2", + namespace=unprivileged_model_namespace.name, + runtime=ovms_kserve_serving_runtime_auth.name, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + model_service_account=model_service_account_2.name, + enable_auth=True, + external_route=True, + model_version=ModelVersion.OPSET13, ) as isvc: yield isvc @pytest.fixture(scope="class") -def http_view_role( +def http_ovms_view_role( unprivileged_client: DynamicClient, - http_s3_caikit_serverless_inference_service: InferenceService, + http_ovms_serverless_inference_service: InferenceService, ) -> Generator[Role, Any, Any]: with create_isvc_view_role( client=unprivileged_client, - isvc=http_s3_caikit_serverless_inference_service, - name=f"{http_s3_caikit_serverless_inference_service.name}-view", - resource_names=[http_s3_caikit_serverless_inference_service.name], + isvc=http_ovms_serverless_inference_service, + name=f"{http_ovms_serverless_inference_service.name}-view", + resource_names=[http_ovms_serverless_inference_service.name], ) as role: yield role @pytest.fixture(scope="class") -def http_raw_view_role( +def http_ovms_raw_view_role( unprivileged_client: DynamicClient, - http_s3_caikit_raw_inference_service: InferenceService, + http_ovms_raw_inference_service: InferenceService, ) -> Generator[Role, Any, Any]: with create_isvc_view_role( client=unprivileged_client, - isvc=http_s3_caikit_raw_inference_service, - name=f"{http_s3_caikit_raw_inference_service.name}-view", - resource_names=[http_s3_caikit_raw_inference_service.name], + isvc=http_ovms_raw_inference_service, + name=f"{http_ovms_raw_inference_service.name}-view", + resource_names=[http_ovms_raw_inference_service.name], ) as role: yield role @pytest.fixture(scope="class") -def http_role_binding( +def http_ovms_role_binding( unprivileged_client: DynamicClient, - http_view_role: Role, + http_ovms_view_role: Role, model_service_account: ServiceAccount, - http_s3_caikit_serverless_inference_service: InferenceService, + http_ovms_serverless_inference_service: InferenceService, ) -> Generator[RoleBinding, Any, Any]: with RoleBinding( client=unprivileged_client, namespace=model_service_account.namespace, - name=f"{Protocols.HTTP}-{model_service_account.name}-view", - role_ref_name=http_view_role.name, - role_ref_kind=http_view_role.kind, + name=f"{Protocols.HTTP}-{model_service_account.name}-ovms-view", + role_ref_name=http_ovms_view_role.name, + role_ref_kind=http_ovms_view_role.kind, subjects_kind=model_service_account.kind, subjects_name=model_service_account.name, ) as rb: @@ -133,18 +245,18 @@ def http_role_binding( @pytest.fixture(scope="class") -def http_raw_role_binding( +def http_ovms_raw_role_binding( unprivileged_client: DynamicClient, - http_raw_view_role: Role, + http_ovms_raw_view_role: Role, model_service_account: ServiceAccount, - http_s3_caikit_raw_inference_service: InferenceService, + http_ovms_raw_inference_service: InferenceService, ) -> Generator[RoleBinding, Any, Any]: with RoleBinding( client=unprivileged_client, namespace=model_service_account.namespace, - name=f"{Protocols.HTTP}-{model_service_account.name}-view", - role_ref_name=http_raw_view_role.name, - role_ref_kind=http_raw_view_role.kind, + name=f"{Protocols.HTTP}-{model_service_account.name}-ovms-view", + role_ref_name=http_ovms_raw_view_role.name, + role_ref_kind=http_ovms_raw_view_role.kind, subjects_kind=model_service_account.kind, subjects_name=model_service_account.name, ) as rb: @@ -152,45 +264,47 @@ def http_raw_role_binding( @pytest.fixture(scope="class") -def http_inference_token(model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str: +def http_ovms_inference_token(model_service_account: ServiceAccount, http_ovms_role_binding: RoleBinding) -> str: return RedactedString(value=create_inference_token(model_service_account=model_service_account)) @pytest.fixture(scope="class") -def http_raw_inference_token(model_service_account: ServiceAccount, http_raw_role_binding: RoleBinding) -> str: +def http_ovms_raw_inference_token( + model_service_account: ServiceAccount, http_ovms_raw_role_binding: RoleBinding +) -> str: return RedactedString(value=create_inference_token(model_service_account=model_service_account)) @pytest.fixture() -def patched_remove_authentication_isvc( - http_s3_caikit_serverless_inference_service: InferenceService, +def patched_remove_ovms_authentication_isvc( + http_ovms_serverless_inference_service: InferenceService, ) -> Generator[InferenceService, Any, Any]: with ResourceEditor( patches={ - http_s3_caikit_serverless_inference_service: { + http_ovms_serverless_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } } } ): - yield http_s3_caikit_serverless_inference_service + yield http_ovms_serverless_inference_service @pytest.fixture() -def patched_remove_raw_authentication_isvc( +def patched_remove_ovms_raw_authentication_isvc( admin_client: DynamicClient, unprivileged_client: DynamicClient, - http_s3_caikit_raw_inference_service: InferenceService, + http_ovms_raw_inference_service: InferenceService, ) -> Generator[InferenceService, Any, Any]: predictor_pod = get_pods_by_isvc_label( client=unprivileged_client, - isvc=http_s3_caikit_raw_inference_service, + isvc=http_ovms_raw_inference_service, )[0] with ResourceEditor( patches={ - http_s3_caikit_raw_inference_service: { + http_ovms_raw_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } @@ -200,203 +314,4 @@ def patched_remove_raw_authentication_isvc( if is_jira_open(jira_id="RHOAIENG-19275", admin_client=admin_client): predictor_pod.wait_deleted() - yield http_s3_caikit_raw_inference_service - - -@pytest.fixture(scope="class") -def model_service_account_2( - unprivileged_client: DynamicClient, models_endpoint_s3_secret: Secret -) -> Generator[ServiceAccount, Any, Any]: - with ServiceAccount( - client=unprivileged_client, - namespace=models_endpoint_s3_secret.namespace, - name="models-bucket-sa-2", - secrets=[{"name": models_endpoint_s3_secret.name}], - ) as sa: - yield sa - - -@pytest.fixture(scope="class") -def grpc_view_role( - unprivileged_client: DynamicClient, grpc_s3_inference_service: InferenceService -) -> Generator[Role, Any, Any]: - with create_isvc_view_role( - client=unprivileged_client, - isvc=grpc_s3_inference_service, - name=f"{grpc_s3_inference_service.name}-view", - resource_names=[grpc_s3_inference_service.name], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def grpc_role_binding( - unprivileged_client: DynamicClient, - grpc_view_role: Role, - grpc_model_service_account: ServiceAccount, - grpc_s3_inference_service: InferenceService, -) -> Generator[RoleBinding, Any, Any]: - with RoleBinding( - client=unprivileged_client, - namespace=grpc_model_service_account.namespace, - name=f"{Protocols.GRPC}-{grpc_model_service_account.name}-view", - role_ref_name=grpc_view_role.name, - role_ref_kind=grpc_view_role.kind, - subjects_kind=grpc_model_service_account.kind, - subjects_name=grpc_model_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def grpc_inference_token(grpc_model_service_account: ServiceAccount, grpc_role_binding: RoleBinding) -> str: - return RedactedString(value=create_inference_token(model_service_account=grpc_model_service_account)) - - -@pytest.fixture(scope="class") -def http_s3_caikit_serverless_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.SERVERLESS, - enable_auth=True, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_raw_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - models_endpoint_s3_secret: Secret, - model_service_account: ServiceAccount, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - storage_key=models_endpoint_s3_secret.name, - storage_path=urlparse(s3_models_storage_uri).path, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - model_service_account=model_service_account.name, - enable_auth=True, - external_route=True, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_raw_inference_service_2( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - http_s3_caikit_tgis_serving_runtime: ServingRuntime, - s3_models_storage_uri: str, - model_service_account_2: ServiceAccount, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}-2", - namespace=unprivileged_model_namespace.name, - runtime=http_s3_caikit_tgis_serving_runtime.name, - storage_uri=s3_models_storage_uri, - model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - model_service_account=model_service_account_2.name, - enable_auth=True, - external_route=True, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_caikit_tgis_serving_runtime( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, -) -> Generator[ServingRuntime, Any, Any]: - with ServingRuntimeFromTemplate( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", - namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, - multi_model=False, - enable_http=True, - enable_grpc=False, - ) as model_runtime: - yield model_runtime - - -@pytest.fixture() -def patched_remove_authentication_model_mesh_runtime( - http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, -) -> Generator[ServingRuntime, Any, Any]: - with ResourceEditor( - patches={ - http_s3_ovms_model_mesh_serving_runtime: { - "metadata": { - "annotations": {"enable-auth": "false"}, - } - } - } - ): - yield http_s3_ovms_model_mesh_serving_runtime - - -@pytest.fixture(scope="class") -def http_model_mesh_view_role( - unprivileged_client: DynamicClient, - http_s3_openvino_model_mesh_inference_service: InferenceService, - http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, -) -> Generator[Role, Any, Any]: - with Role( - client=unprivileged_client, - name=f"{http_s3_openvino_model_mesh_inference_service.name}-view", - namespace=http_s3_openvino_model_mesh_inference_service.namespace, - rules=[ - {"apiGroups": [""], "resources": ["services"], "verbs": ["get"]}, - ], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def http_model_mesh_role_binding( - unprivileged_client: DynamicClient, - http_model_mesh_view_role: Role, - ci_service_account: ServiceAccount, -) -> Generator[RoleBinding, Any, Any]: - with RoleBinding( - client=unprivileged_client, - namespace=ci_service_account.namespace, - name=f"{Protocols.HTTP}-{ci_service_account.name}-view", - role_ref_name=http_model_mesh_view_role.name, - role_ref_kind=http_model_mesh_view_role.kind, - subjects_kind=ci_service_account.kind, - subjects_name=ci_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def http_model_mesh_inference_token( - ci_service_account: ServiceAccount, http_model_mesh_role_binding: RoleBinding -) -> str: - return RedactedString(value=create_inference_token(model_service_account=ci_service_account)) + yield http_ovms_raw_inference_service diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py index f276fbd39..42cc0a45f 100644 --- a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py +++ b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_raw.py @@ -2,23 +2,22 @@ from ocp_resources.resource import ResourceEditor from tests.model_serving.model_server.utils import verify_inference_response -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.constants import Annotations from utilities.inference_utils import Inference, UserInference from utilities.infra import check_pod_status_in_time, get_pods_by_isvc_label from utilities.jira import is_jira_open -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG pytestmark = pytest.mark.usefixtures("valid_aws_config") @pytest.mark.rawdeployment @pytest.mark.parametrize( - "unprivileged_model_namespace, s3_models_storage_uri", + "unprivileged_model_namespace", [ pytest.param( {"name": "kserve-raw-token-authentication"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -27,42 +26,40 @@ class TestKserveTokenAuthenticationRawForRest: @pytest.mark.smoke @pytest.mark.ocp_interop @pytest.mark.dependency(name="test_model_authentication_using_rest_raw") - def test_model_authentication_using_rest_raw(self, http_s3_caikit_raw_inference_service, http_raw_inference_token): + def test_model_authentication_using_rest_raw(self, http_ovms_raw_inference_service, http_ovms_raw_inference_token): """Verify RAW Kserve model query with token using REST""" verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, ) @pytest.mark.dependency(name="test_disabled_raw_model_authentication") - def test_disabled_raw_model_authentication(self, patched_remove_raw_authentication_isvc): + def test_disabled_raw_model_authentication(self, patched_remove_ovms_raw_authentication_isvc): """Verify model query after authentication is disabled""" verify_inference_response( - inference_service=patched_remove_raw_authentication_isvc, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=patched_remove_ovms_raw_authentication_isvc, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.sanity @pytest.mark.jira("RHOAIENG-19275", run=False) - def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_raw_inference_service): + def test_raw_disable_enable_authentication_no_pod_rollout(self, http_ovms_raw_inference_service): """Verify no pod rollout when disabling and enabling authentication""" pod = get_pods_by_isvc_label( - client=http_s3_caikit_raw_inference_service.client, - isvc=http_s3_caikit_raw_inference_service, + client=http_ovms_raw_inference_service.client, + isvc=http_ovms_raw_inference_service, )[0] ResourceEditor( patches={ - http_s3_caikit_raw_inference_service: { + http_ovms_raw_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } @@ -74,7 +71,7 @@ def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_r ResourceEditor( patches={ - http_s3_caikit_raw_inference_service: { + http_ovms_raw_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "true"}, } @@ -85,44 +82,42 @@ def test_raw_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_r check_pod_status_in_time(pod=pod, status={pod.Status.RUNNING}) @pytest.mark.dependency(depends=["test_disabled_raw_model_authentication"]) - def test_re_enabled_raw_model_authentication(self, http_s3_caikit_raw_inference_service, http_raw_inference_token): + def test_re_enabled_raw_model_authentication(self, http_ovms_raw_inference_service, http_ovms_raw_inference_token): """Verify model query after authentication is re-enabled""" verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, ) @pytest.mark.dependency(name="test_cross_model_authentication_raw") def test_cross_model_authentication_raw( - self, http_s3_caikit_raw_inference_service_2, http_raw_inference_token, admin_client + self, http_ovms_raw_inference_service_2, http_ovms_raw_inference_token, admin_client ): """Verify model with another model token""" if is_jira_open(jira_id="RHOAIENG-19645", admin_client=admin_client): inference = UserInference( - inference_service=http_s3_caikit_raw_inference_service_2, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service_2, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, ) res = inference.run_inference_flow( - model_name=ModelFormat.CAIKIT, use_default_query=True, token=http_raw_inference_token, insecure=False + use_default_query=True, token=http_ovms_raw_inference_token, insecure=False ) status_line = res["output"].splitlines()[0] assert "302 Found" in status_line, f"Expected '302 Found' in status line, got: {status_line}" else: verify_inference_response( - inference_service=http_s3_caikit_raw_inference_service_2, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_raw_inference_service_2, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_raw_inference_token, + token=http_ovms_raw_inference_token, authorized_user=False, ) diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py index 7a5534594..222079df5 100644 --- a/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py +++ b/tests/model_serving/model_server/authentication/test_kserve_token_authentication_serverless.py @@ -2,20 +2,19 @@ from ocp_resources.resource import ResourceEditor from tests.model_serving.model_server.utils import verify_inference_response -from utilities.constants import Annotations, ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Annotations, Protocols from utilities.inference_utils import Inference from utilities.infra import check_pod_status_in_time, get_pods_by_isvc_label -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG pytestmark = [pytest.mark.serverless, pytest.mark.usefixtures("valid_aws_config")] @pytest.mark.parametrize( - "unprivileged_model_namespace, s3_models_storage_uri", + "unprivileged_model_namespace", [ pytest.param( {"name": "kserve-token-authentication"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -24,107 +23,74 @@ class TestKserveServerlessTokenAuthentication: @pytest.mark.smoke @pytest.mark.ocp_interop @pytest.mark.dependency(name="test_model_authentication_using_rest") - def test_model_authentication_using_rest(self, http_s3_caikit_serverless_inference_service, http_inference_token): + def test_model_authentication_using_rest(self, http_ovms_serverless_inference_service, http_ovms_inference_token): """Verify model query with token using REST""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_inference_token, - ) - - @pytest.mark.smoke - @pytest.mark.ocp_interop - def test_model_authentication_using_grpc(self, grpc_s3_inference_service, grpc_inference_token): - """Verify model query with token using GRPC""" - verify_inference_response( - inference_service=grpc_s3_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.STREAMING, - protocol=Protocols.GRPC, - model_name=ModelFormat.CAIKIT, - use_default_query=True, - token=grpc_inference_token, + token=http_ovms_inference_token, ) @pytest.mark.dependency(name="test_disabled_model_authentication") - def test_disabled_model_authentication(self, patched_remove_authentication_isvc): + def test_disabled_model_authentication(self, patched_remove_ovms_authentication_isvc): """Verify model query after authentication is disabled""" verify_inference_response( - inference_service=patched_remove_authentication_isvc, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=patched_remove_ovms_authentication_isvc, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.dependency(depends=["test_disabled_model_authentication"]) - def test_re_enabled_model_authentication(self, http_s3_caikit_serverless_inference_service, http_inference_token): + def test_re_enabled_model_authentication(self, http_ovms_serverless_inference_service, http_ovms_inference_token): """Verify model query after authentication is re-enabled""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=http_inference_token, + token=http_ovms_inference_token, ) - def test_model_authentication_using_invalid_token(self, http_s3_caikit_serverless_inference_service): + def test_model_authentication_using_invalid_token(self, http_ovms_serverless_inference_service): """Verify model query with an invalid token""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, token="dummy", authorized_user=False, ) - def test_model_authentication_without_token(self, http_s3_caikit_serverless_inference_service): + def test_model_authentication_without_token(self, http_ovms_serverless_inference_service): """Verify model query without providing a token""" verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, - protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, - use_default_query=True, - authorized_user=False, - ) - - @pytest.mark.sanity - def test_block_cross_model_authentication(self, http_s3_caikit_serverless_inference_service, grpc_inference_token): - """Verify model query with a second model's token is blocked""" - verify_inference_response( - inference_service=http_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=http_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, - token=grpc_inference_token, authorized_user=False, ) @pytest.mark.sanity - def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_s3_caikit_serverless_inference_service): + def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_ovms_serverless_inference_service): """Verify no pod rollout when disabling and enabling authentication""" pod = get_pods_by_isvc_label( - client=http_s3_caikit_serverless_inference_service.client, - isvc=http_s3_caikit_serverless_inference_service, + client=http_ovms_serverless_inference_service.client, + isvc=http_ovms_serverless_inference_service, )[0] ResourceEditor( patches={ - http_s3_caikit_serverless_inference_service: { + http_ovms_serverless_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "false"}, } @@ -136,7 +102,7 @@ def test_serverless_disable_enable_authentication_no_pod_rollout(self, http_s3_c ResourceEditor( patches={ - http_s3_caikit_serverless_inference_service: { + http_ovms_serverless_inference_service: { "metadata": { "annotations": {Annotations.KserveAuth.SECURITY: "true"}, } diff --git a/tests/model_serving/model_server/authentication/test_non_admin_users.py b/tests/model_serving/model_server/authentication/test_non_admin_users.py index bcbddba85..7380ee30b 100644 --- a/tests/model_serving/model_server/authentication/test_non_admin_users.py +++ b/tests/model_serving/model_server/authentication/test_non_admin_users.py @@ -3,17 +3,16 @@ from tests.model_serving.model_server.utils import ( verify_inference_response, ) -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.inference_utils import Inference -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_serverless_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-serverless"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -22,24 +21,22 @@ @pytest.mark.serverless class TestServerlessUnprivilegedUser: @pytest.mark.polarion("ODS-2552") - def test_non_admin_deploy_serverless_and_query_model(self, unprivileged_s3_caikit_serverless_inference_service): + def test_non_admin_deploy_serverless_and_query_model(self, unprivileged_ovms_serverless_inference_service): """Verify non admin can deploy a model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_raw_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-raw"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_HF}, ) ], indirect=True, @@ -50,14 +47,13 @@ class TestRawUnprivilegedUser: @pytest.mark.polarion("ODS-2611") def test_non_admin_deploy_raw_and_query_model( self, - unprivileged_s3_caikit_raw_inference_service, + unprivileged_ovms_raw_inference_service, ): """Verify non admin can deploy a Raw model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTP, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index 831048656..f954d5a21 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -21,7 +21,6 @@ KServeDeploymentType, ModelFormat, ModelInferenceRuntime, - ModelStoragePath, Protocols, RuntimeTemplates, StorageClassName, @@ -494,28 +493,7 @@ def http_s3_openvino_second_model_mesh_inference_service( @pytest.fixture(scope="class") -def unprivileged_s3_caikit_raw_inference_service( - request: FixtureRequest, - unprivileged_client: DynamicClient, - unprivileged_model_namespace: Namespace, - unprivileged_s3_caikit_serving_runtime: ServingRuntime, - unprivileged_models_endpoint_s3_secret: Secret, -) -> Generator[InferenceService, Any, Any]: - with create_isvc( - client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}-raw", - namespace=unprivileged_model_namespace.name, - runtime=unprivileged_s3_caikit_serving_runtime.name, - model_format=unprivileged_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, - storage_key=unprivileged_models_endpoint_s3_secret.name, - storage_path=ModelStoragePath.FLAN_T5_SMALL_CAIKIT, - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def unprivileged_s3_caikit_serving_runtime( +def unprivileged_ovms_kserve_serving_runtime( admin_client: DynamicClient, unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace, @@ -523,56 +501,59 @@ def unprivileged_s3_caikit_serving_runtime( with ServingRuntimeFromTemplate( client=admin_client, unprivileged_client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", + name=f"{Protocols.HTTP}-ovms-runtime", namespace=unprivileged_model_namespace.name, - template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING, + template_name=RuntimeTemplates.OVMS_KSERVE, multi_model=False, - enable_http=True, - enable_grpc=False, + model_format_name={ModelFormat.ONNX: ModelVersion.OPSET13}, + resources={ + ModelFormat.OVMS: { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + } + }, ) as model_runtime: yield model_runtime @pytest.fixture(scope="class") -def unprivileged_models_endpoint_s3_secret( +def unprivileged_ovms_serverless_inference_service( unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace, - aws_access_key_id: str, - aws_secret_access_key: str, - models_s3_bucket_name: str, - models_s3_bucket_region: str, - models_s3_bucket_endpoint: str, -) -> Generator[Secret, Any, Any]: - with s3_endpoint_secret( + unprivileged_ovms_kserve_serving_runtime: ServingRuntime, + ci_endpoint_s3_secret: Secret, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( client=unprivileged_client, - name="models-bucket-secret", + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", namespace=unprivileged_model_namespace.name, - aws_access_key=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_s3_region=models_s3_bucket_region, - aws_s3_bucket=models_s3_bucket_name, - aws_s3_endpoint=models_s3_bucket_endpoint, - ) as secret: - yield secret + runtime=unprivileged_ovms_kserve_serving_runtime.name, + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.SERVERLESS, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, + ) as isvc: + yield isvc @pytest.fixture(scope="class") -def unprivileged_s3_caikit_serverless_inference_service( - request: FixtureRequest, +def unprivileged_ovms_raw_inference_service( unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace, - unprivileged_s3_caikit_serving_runtime: ServingRuntime, - unprivileged_models_endpoint_s3_secret: Secret, + unprivileged_ovms_kserve_serving_runtime: ServingRuntime, + ci_endpoint_s3_secret: Secret, ) -> Generator[InferenceService, Any, Any]: with create_isvc( client=unprivileged_client, - name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", + name=f"{Protocols.HTTP}-{ModelFormat.ONNX}", namespace=unprivileged_model_namespace.name, - runtime=unprivileged_s3_caikit_serving_runtime.name, - model_format=unprivileged_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name, - deployment_mode=KServeDeploymentType.SERVERLESS, - storage_key=unprivileged_models_endpoint_s3_secret.name, - storage_path=request.param["model-dir"], + runtime=unprivileged_ovms_kserve_serving_runtime.name, + model_format=ModelAndFormat.OPENVINO_IR, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, + storage_key=ci_endpoint_s3_secret.name, + storage_path="test-dir", + model_version=ModelVersion.OPSET13, ) as isvc: yield isvc diff --git a/tests/model_serving/model_server/metrics/test_non_admin_users.py b/tests/model_serving/model_server/metrics/test_non_admin_users.py index 135e3d978..3b7d3a14d 100644 --- a/tests/model_serving/model_server/metrics/test_non_admin_users.py +++ b/tests/model_serving/model_server/metrics/test_non_admin_users.py @@ -4,18 +4,17 @@ run_inference_multiple_times, verify_inference_response, ) -from utilities.constants import ModelFormat, ModelStoragePath, Protocols +from utilities.constants import Protocols from utilities.inference_utils import Inference -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG from utilities.monitoring import validate_metrics_field @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_serverless_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-serverless"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT}, ) ], indirect=True, @@ -24,24 +23,22 @@ @pytest.mark.serverless class TestServerlessUnprivilegedUser: @pytest.mark.polarion("ODS-2552") - def test_non_admin_deploy_serverless_and_query_metrics(self, unprivileged_s3_caikit_serverless_inference_service): + def test_non_admin_deploy_serverless_and_query_metrics(self, unprivileged_ovms_serverless_inference_service): """Verify non admin can deploy a model and query using REST""" verify_inference_response( - inference_service=unprivileged_s3_caikit_serverless_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + inference_service=unprivileged_ovms_serverless_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTPS, - model_name=ModelFormat.CAIKIT, use_default_query=True, ) @pytest.mark.parametrize( - "unprivileged_model_namespace, unprivileged_s3_caikit_raw_inference_service", + "unprivileged_model_namespace", [ pytest.param( {"name": "non-admin-metrics"}, - {"model-dir": ModelStoragePath.FLAN_T5_SMALL_HF}, ) ], indirect=True, @@ -52,7 +49,7 @@ class TestRawUnprivilegedUserMetrics: @pytest.mark.metrics def test_non_admin_raw_metrics( self, - unprivileged_s3_caikit_raw_inference_service, + unprivileged_ovms_raw_inference_service, prometheus, user_workload_monitoring_config_map, ): @@ -60,15 +57,14 @@ def test_non_admin_raw_metrics( total_runs = 5 run_inference_multiple_times( - isvc=unprivileged_s3_caikit_raw_inference_service, - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, - inference_type=Inference.ALL_TOKENS, + isvc=unprivileged_ovms_raw_inference_service, + inference_config=ONNX_INFERENCE_CONFIG, + inference_type=Inference.INFER, protocol=Protocols.HTTP, - model_name=ModelFormat.CAIKIT, iterations=total_runs, ) validate_metrics_field( prometheus=prometheus, - metrics_query="tgi_request_count", + metrics_query="ovms_requests_success", expected_value=str(total_runs), ) diff --git a/utilities/infra.py b/utilities/infra.py index 4cb94f778..a7d83c3c5 100644 --- a/utilities/infra.py +++ b/utilities/infra.py @@ -140,23 +140,26 @@ def create_ns( namespace_kwargs["label"][Labels.Kueue.MANAGED] = "true" # type: ignore if unprivileged_client: - with ProjectRequest(name=name, client=unprivileged_client, teardown=teardown): - project = Project(**namespace_kwargs) - project.wait_for_status(status=project.Status.ACTIVE, timeout=Timeout.TIMEOUT_2MIN) - if _labels := namespace_kwargs.get("label", {}): - # To patch the namespace, admin client is required - ns = Namespace(client=get_client(), name=name) - ResourceEditor({ - ns: { - "metadata": { - "labels": _labels, - } + namespace_kwargs["client"] = unprivileged_client + project = ProjectRequest(**namespace_kwargs).deploy() + project.wait_for_status(status=project.Status.ACTIVE, timeout=Timeout.TIMEOUT_2MIN) + if _labels := namespace_kwargs.get("label", {}): + # To patch the namespace, admin client is required + ns = Namespace(client=get_client(), name=name) + ResourceEditor({ + ns: { + "metadata": { + "labels": _labels, } - }).update() - yield project - - if teardown: - wait_for_serverless_pods_deletion(resource=project, admin_client=client) + } + }).update() + yield project + + if teardown: + wait_for_serverless_pods_deletion(resource=project, admin_client=get_client()) + # cleanup must be done with admin client + project.client = get_client() + project.clean_up() else: with Namespace(**namespace_kwargs) as ns: