From b3623827502f62be70602f8baca5b9401baf4af1 Mon Sep 17 00:00:00 2001 From: Ruth Netser Date: Wed, 18 Dec 2024 16:31:18 +0200 Subject: [PATCH 1/7] Create size-labeler.yml --- .github/workflows/size-labeler.yml | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/size-labeler.yml diff --git a/.github/workflows/size-labeler.yml b/.github/workflows/size-labeler.yml new file mode 100644 index 000000000..8d0fcbd94 --- /dev/null +++ b/.github/workflows/size-labeler.yml @@ -0,0 +1,31 @@ +name: PR Size Labeler +on: + pull_request: + types: [opened, synchronize] + +permissions: + pull-requests: write + contents: write + +jobs: + label-size: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox uv + + - name: Run size labeler + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_NUMBER: ${{github.event.pull_request.number}} + run: uv run python .github/workflows/scripts/size_labeler.py From 3c6a875ef85e8e5dee1e7e4125f756b6874f842e Mon Sep 17 00:00:00 2001 From: Ruth Netser Date: Wed, 18 Dec 2024 16:32:36 +0200 Subject: [PATCH 2/7] Delete .github/workflows/size-labeler.yml --- .github/workflows/size-labeler.yml | 31 ------------------------------ 1 file changed, 31 deletions(-) delete mode 100644 .github/workflows/size-labeler.yml diff --git a/.github/workflows/size-labeler.yml b/.github/workflows/size-labeler.yml deleted file mode 100644 index 8d0fcbd94..000000000 --- a/.github/workflows/size-labeler.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: PR Size Labeler -on: - pull_request: - types: [opened, synchronize] - -permissions: - pull-requests: write - contents: write - -jobs: - label-size: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.13' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install tox uv - - - name: Run size labeler - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_NUMBER: ${{github.event.pull_request.number}} - run: uv run python .github/workflows/scripts/size_labeler.py From 17938d6d99533a1882ccc07e00bfd352a594cd8f Mon Sep 17 00:00:00 2001 From: rnetser Date: Sun, 2 Feb 2025 19:22:27 +0200 Subject: [PATCH 3/7] model mesh - add auth tests --- .../model_server/authentication/conftest.py | 78 +++++++++-- .../test_model_mesh_authentication.py | 123 ++++++++++++++++++ tests/model_serving/model_server/conftest.py | 59 ++++++++- .../model_server/model_mesh/conftest.py | 67 ---------- .../model_server/ovms/model_mesh/conftest.py | 15 +-- utilities/infra.py | 11 +- 6 files changed, 253 insertions(+), 100 deletions(-) create mode 100644 tests/model_serving/model_server/authentication/test_model_mesh_authentication.py delete mode 100644 tests/model_serving/model_server/model_mesh/conftest.py diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index 0e3aa5d97..f0f8d28fa 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -1,4 +1,3 @@ -import shlex from typing import Any, Generator from urllib.parse import urlparse @@ -13,9 +12,8 @@ from ocp_resources.secret import Secret from ocp_resources.service_account import ServiceAccount from ocp_resources.serving_runtime import ServingRuntime -from pyhelper_utils.shell import run_command -from utilities.infra import create_isvc_view_role, create_ns, s3_endpoint_secret, create_inference_token +from utilities.infra import create_resource_view_role, create_ns, s3_endpoint_secret, create_inference_token from tests.model_serving.model_server.utils import create_isvc from utilities.constants import ( KServeDeploymentType, @@ -85,9 +83,9 @@ def http_view_role( admin_client: DynamicClient, http_s3_caikit_serverless_inference_service: InferenceService, ) -> Role: - with create_isvc_view_role( + with create_resource_view_role( client=admin_client, - isvc=http_s3_caikit_serverless_inference_service, + resource=http_s3_caikit_serverless_inference_service, name=f"{http_s3_caikit_serverless_inference_service.name}-view", resource_names=[http_s3_caikit_serverless_inference_service.name], ) as role: @@ -99,9 +97,9 @@ def http_raw_view_role( admin_client: DynamicClient, http_s3_caikit_raw_inference_service: InferenceService, ) -> Role: - with create_isvc_view_role( + with create_resource_view_role( client=admin_client, - isvc=http_s3_caikit_raw_inference_service, + resource=http_s3_caikit_raw_inference_service, name=f"{http_s3_caikit_raw_inference_service.name}-view", resource_names=[http_s3_caikit_raw_inference_service.name], ) as role: @@ -192,9 +190,9 @@ def patched_remove_raw_authentication_isvc( @pytest.fixture(scope="class") def grpc_view_role(admin_client: DynamicClient, grpc_s3_inference_service: InferenceService) -> Role: - with create_isvc_view_role( + with create_resource_view_role( client=admin_client, - isvc=grpc_s3_inference_service, + resource=grpc_s3_inference_service, name=f"{grpc_s3_inference_service.name}-view", resource_names=[grpc_s3_inference_service.name], ) as role: @@ -222,11 +220,7 @@ def grpc_role_binding( @pytest.fixture(scope="class") def grpc_inference_token(grpc_model_service_account: ServiceAccount, grpc_role_binding: RoleBinding) -> str: - return run_command( - command=shlex.split( - f"oc create token -n {grpc_model_service_account.namespace} {grpc_model_service_account.name}" - ) - )[1].strip() + return create_inference_token(model_service_account=grpc_model_service_account) @pytest.fixture(scope="class") @@ -364,3 +358,59 @@ def http_s3_caikit_tgis_serving_runtime( enable_grpc=False, ) as model_runtime: yield model_runtime + + +@pytest.fixture() +def patched_remove_authentication_model_mesh_isvc( + admin_client: DynamicClient, + http_s3_openvino_model_mesh_inference_service: InferenceService, +) -> InferenceService: + with ResourceEditor( + patches={ + http_s3_openvino_model_mesh_inference_service: { + "metadata": { + "annotations": {Annotations.KserveAuth.SECURITY: "false"}, + } + } + } + ): + yield http_s3_openvino_model_mesh_inference_service + + +@pytest.fixture(scope="class") +def http_model_mesh_view_role( + admin_client: DynamicClient, + http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, +) -> Role: + with create_resource_view_role( + client=admin_client, + resource=http_s3_ovms_model_mesh_serving_runtime, + name=f"{http_s3_ovms_model_mesh_serving_runtime.name}-view", + resource_names=[http_s3_ovms_model_mesh_serving_runtime.name], + ) as role: + yield role + + +@pytest.fixture(scope="class") +def http_model_mesh_role_binding( + admin_client: DynamicClient, + http_model_mesh_view_role: Role, + model_service_account: ServiceAccount, +) -> RoleBinding: + with RoleBinding( + client=admin_client, + namespace=model_service_account.namespace, + name=f"{Protocols.HTTP}-{model_service_account.name}-view", + role_ref_name=http_model_mesh_view_role.name, + role_ref_kind=http_model_mesh_view_role.kind, + subjects_kind=model_service_account.kind, + subjects_name=model_service_account.name, + ) as rb: + yield rb + + +@pytest.fixture(scope="class") +def http_model_mesh_inference_token( + model_service_account: ServiceAccount, http_model_mesh_role_binding: RoleBinding +) -> str: + return create_inference_token(model_service_account=model_service_account) diff --git a/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py b/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py new file mode 100644 index 000000000..b08a2b3ee --- /dev/null +++ b/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py @@ -0,0 +1,123 @@ +import pytest + +from tests.model_serving.model_server.utils import verify_inference_response +from utilities.constants import ( + ModelFormat, + ModelStoragePath, + Protocols, +) +from utilities.inference_utils import Inference +from utilities.manifests.openvino import OPENVINO_INFERENCE_CONFIG +from utilities.manifests.tensorflow import TENSORFLOW_INFERENCE_CONFIG + +pytestmark = [pytest.mark.modelmesh, pytest.mark.sanity] + + +@pytest.mark.parametrize( + "model_namespace, http_s3_ovms_model_mesh_serving_runtime, http_s3_openvino_model_mesh_inference_service", + [ + pytest.param( + {"name": "model-mesh-multi-authentication", "modelmesh-enabled": True}, + {"enable-auth": True}, + {"model-path": ModelStoragePath.OPENVINO_EXAMPLE_MODEL}, + ) + ], + indirect=True, +) +class TestModelMeshAuthentication: + @pytest.mark.dependency(name="test_model_mesh_model_authentication_openvino_inference_with_tensorflow") + def test_model_mesh_model_authentication_openvino_inference_with_tensorflow( + self, http_s3_openvino_model_mesh_inference_service, http_model_mesh_inference_token + ): + """Verify model query with token using REST""" + verify_inference_response( + inference_service=http_s3_openvino_model_mesh_inference_service, + inference_config=OPENVINO_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + token=http_model_mesh_inference_token, + ) + + @pytest.mark.dependency(name="test_model_mesh_disabled_model_authentication") + def test_model_mesh_disabled_model_authentication(self, patched_remove_authentication_model_mesh_isvc): + """Verify model query after authentication is disabled""" + verify_inference_response( + inference_service=patched_remove_authentication_model_mesh_isvc, + inference_config=OPENVINO_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + ) + + @pytest.mark.dependency(depends=["test_model_mesh_disabled_model_authentication"]) + def test_model_mesh_re_enabled_model_authentication( + self, http_s3_openvino_model_mesh_inference_service, http_model_mesh_inference_token + ): + """Verify model query after authentication is re-enabled""" + verify_inference_response( + inference_service=http_s3_openvino_model_mesh_inference_service, + inference_config=OPENVINO_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + token=http_model_mesh_inference_token, + ) + + @pytest.mark.dependency(depends=["test_model_mesh_model_authentication_openvino_inference_with_tensorflow"]) + def test_model_mesh_model_authentication_using_invalid_token(self, http_s3_openvino_model_mesh_inference_service): + """Verify model query with an invalid token""" + verify_inference_response( + inference_service=http_s3_openvino_model_mesh_inference_service, + inference_config=OPENVINO_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + token="dummy", + authorized_user=False, + ) + + @pytest.mark.dependency(depends=["test_model_mesh_model_authentication_openvino_inference_with_tensorflow"]) + def test_model_mesh_model_authentication_without_token(self, http_s3_openvino_model_mesh_inference_service): + """Verify model query without providing a token""" + verify_inference_response( + inference_service=http_s3_openvino_model_mesh_inference_service, + inference_config=OPENVINO_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + authorized_user=False, + ) + + @pytest.mark.parametrize( + "http_s3_ovms_external_route_model_mesh_serving_runtime, http_s3_openvino_second_model_mesh_inference_service", + [ + pytest.param( + {"enable-auth": True}, + { + "model-path": ModelStoragePath.TENSORFLOW_MODEL, + "model-format": ModelFormat.TENSORFLOW, + "runtime-fixture-name": "http_s3_ovms_external_route_model_mesh_serving_runtime", + "model-version": "2", + }, + ) + ], + indirect=True, + ) + def test_model_mesh_block_cross_model_authentication( + self, + http_s3_ovms_external_route_model_mesh_serving_runtime, + http_s3_openvino_model_mesh_inference_service, + http_s3_openvino_second_model_mesh_inference_service, + http_model_mesh_inference_token, + ): + """Verify model query with a second model's token is blocked""" + verify_inference_response( + inference_service=http_s3_openvino_second_model_mesh_inference_service, + inference_config=TENSORFLOW_INFERENCE_CONFIG, + inference_type=Inference.INFER, + protocol=Protocols.HTTPS, + use_default_query=True, + token=http_model_mesh_inference_token, + authorized_user=False, + ) diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index 51a1beb0a..2f008b71c 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -17,15 +17,18 @@ from pytest_testconfig import config as py_config from tests.model_serving.model_server.utils import create_isvc -from utilities.constants import DscComponents, StorageClassName from utilities.constants import ( + DscComponents, KServeDeploymentType, - ModelAndFormat, ModelFormat, ModelInferenceRuntime, - ModelVersion, Protocols, RuntimeTemplates, + StorageClassName, +) +from utilities.constants import ( + ModelAndFormat, + ModelVersion, ) from utilities.infra import s3_endpoint_secret from utilities.data_science_cluster_utils import update_components_in_dsc @@ -396,3 +399,53 @@ def http_s3_tensorflow_model_mesh_inference_service( model_version="2", ) as isvc: yield isvc + + +@pytest.fixture(scope="class") +def http_s3_ovms_external_route_model_mesh_serving_runtime( + request: FixtureRequest, + admin_client: DynamicClient, + model_namespace: Namespace, +) -> ServingRuntime: + with ServingRuntimeFromTemplate( + client=admin_client, + namespace=model_namespace.name, + name=f"{Protocols.HTTP}-{ModelInferenceRuntime.OPENVINO_RUNTIME}-exposed", + template_name=RuntimeTemplates.OVMS_MODEL_MESH, + multi_model=True, + protocol="REST", + resources={ + "ovms": { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + }, + }, + enable_external_route=True, + enable_auth=request.param.get("enable-auth"), + ) as model_runtime: + yield model_runtime + + +@pytest.fixture(scope="class") +def http_s3_openvino_second_model_mesh_inference_service( + request: FixtureRequest, + admin_client: DynamicClient, + model_namespace: Namespace, + ci_model_mesh_endpoint_s3_secret: Secret, + model_mesh_model_service_account: ServiceAccount, +) -> InferenceService: + # Dynamically select the used ServingRuntime by passing "runtime-fixture-name" request.param + runtime = request.getfixturevalue(argname=request.param["runtime-fixture-name"]) + with create_isvc( + client=admin_client, + name=f"{Protocols.HTTP}-{ModelFormat.OPENVINO}-2", + namespace=model_namespace.name, + runtime=runtime.name, + model_service_account=model_mesh_model_service_account.name, + storage_key=ci_model_mesh_endpoint_s3_secret.name, + storage_path=request.param["model-path"], + model_format=request.param["model-format"], + deployment_mode=KServeDeploymentType.MODEL_MESH, + model_version=request.param["model-version"], + ) as isvc: + yield isvc diff --git a/tests/model_serving/model_server/model_mesh/conftest.py b/tests/model_serving/model_server/model_mesh/conftest.py deleted file mode 100644 index b5bbc92bc..000000000 --- a/tests/model_serving/model_server/model_mesh/conftest.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest -from _pytest.fixtures import FixtureRequest -from kubernetes.dynamic import DynamicClient -from ocp_resources.inference_service import InferenceService -from ocp_resources.namespace import Namespace -from ocp_resources.secret import Secret -from ocp_resources.service_account import ServiceAccount -from ocp_resources.serving_runtime import ServingRuntime - -from tests.model_serving.model_server.utils import create_isvc -from utilities.constants import ( - KServeDeploymentType, - ModelFormat, - ModelInferenceRuntime, - Protocols, - RuntimeTemplates, -) -from utilities.serving_runtime import ServingRuntimeFromTemplate - - -@pytest.fixture(scope="class") -def http_s3_openvino_second_model_mesh_inference_service( - request: FixtureRequest, - admin_client: DynamicClient, - model_namespace: Namespace, - ci_model_mesh_endpoint_s3_secret: Secret, - model_mesh_model_service_account: ServiceAccount, -) -> InferenceService: - # Dynamically select the used ServingRuntime by passing "runtime-fixture-name" request.param - runtime = request.getfixturevalue(argname=request.param["runtime-fixture-name"]) - with create_isvc( - client=admin_client, - name=f"{Protocols.HTTP}-{ModelFormat.OPENVINO}-2", - namespace=model_namespace.name, - runtime=runtime.name, - model_service_account=model_mesh_model_service_account.name, - storage_key=ci_model_mesh_endpoint_s3_secret.name, - storage_path=request.param["model-path"], - model_format=request.param["model-format"], - deployment_mode=KServeDeploymentType.MODEL_MESH, - model_version=request.param["model-version"], - ) as isvc: - yield isvc - - -@pytest.fixture(scope="class") -def http_s3_ovms_external_route_model_mesh_serving_runtime( - request: FixtureRequest, - admin_client: DynamicClient, - model_namespace: Namespace, -) -> ServingRuntime: - with ServingRuntimeFromTemplate( - client=admin_client, - namespace=model_namespace.name, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.OPENVINO_RUNTIME}-exposed", - template_name=RuntimeTemplates.OVMS_MODEL_MESH, - multi_model=True, - protocol="REST", - resources={ - "ovms": { - "requests": {"cpu": "1", "memory": "4Gi"}, - "limits": {"cpu": "2", "memory": "8Gi"}, - }, - }, - enable_external_route=True, - ) as model_runtime: - yield model_runtime diff --git a/tests/model_serving/model_server/ovms/model_mesh/conftest.py b/tests/model_serving/model_server/ovms/model_mesh/conftest.py index 095c05dbb..77fbe6fa0 100644 --- a/tests/model_serving/model_server/ovms/model_mesh/conftest.py +++ b/tests/model_serving/model_server/ovms/model_mesh/conftest.py @@ -1,5 +1,3 @@ -import shlex - import pytest from kubernetes.dynamic import DynamicClient from ocp_resources.resource import ResourceEditor @@ -7,12 +5,11 @@ from ocp_resources.role_binding import RoleBinding from ocp_resources.service_account import ServiceAccount from ocp_resources.serving_runtime import ServingRuntime -from pyhelper_utils.shell import run_command from utilities.constants import ( Protocols, ) -from utilities.infra import create_isvc_view_role +from utilities.infra import create_inference_token, create_resource_view_role @pytest.fixture(scope="class") @@ -20,9 +17,9 @@ def model_mesh_view_role( admin_client: DynamicClient, http_s3_openvino_model_mesh_inference_service: ServingRuntime, ) -> Role: - with create_isvc_view_role( + with create_resource_view_role( client=admin_client, - isvc=http_s3_openvino_model_mesh_inference_service, + resource=http_s3_openvino_model_mesh_inference_service, name=f"{http_s3_openvino_model_mesh_inference_service.name}-view", resource_names=[http_s3_openvino_model_mesh_inference_service.name], ) as role: @@ -52,11 +49,7 @@ def model_mesh_inference_token( model_mesh_model_service_account: ServiceAccount, model_mesh_role_binding: RoleBinding, ) -> str: - return run_command( - command=shlex.split( - f"oc create token -n {model_mesh_model_service_account.namespace} {model_mesh_model_service_account.name}" - ) - )[1].strip() + return create_inference_token(model_service_account=model_mesh_model_service_account) @pytest.fixture() diff --git a/utilities/infra.py b/utilities/infra.py index d1ac5f625..9c9bc41b0 100644 --- a/utilities/infra.py +++ b/utilities/infra.py @@ -134,16 +134,17 @@ def s3_endpoint_secret( @contextmanager -def create_isvc_view_role( +def create_resource_view_role( client: DynamicClient, - isvc: InferenceService, + resource: InferenceService | ServingRuntime, name: str, resource_names: Optional[List[str]] = None, ) -> Role: + resources_type = "inferenceservices" if isinstance(resource, InferenceService) else "servingruntimes" rules = [ { - "apiGroups": [isvc.api_group], - "resources": ["inferenceservices"], + "apiGroups": [resource.api_group], + "resources": [resources_type], "verbs": ["get"], }, ] @@ -154,7 +155,7 @@ def create_isvc_view_role( with Role( client=client, name=name, - namespace=isvc.namespace, + namespace=resource.namespace, rules=rules, ) as role: yield role From 44a31208d3d1c9bb64aefd29ff35d6910344a36a Mon Sep 17 00:00:00 2001 From: rnetser Date: Sun, 2 Feb 2025 19:43:30 +0200 Subject: [PATCH 4/7] xx --- .../model_server/authentication/conftest.py | 78 ++--------- .../test_model_mesh_authentication.py | 123 ------------------ tests/model_serving/model_server/conftest.py | 59 +-------- .../model_server/model_mesh/conftest.py | 67 ++++++++++ .../model_server/ovms/model_mesh/conftest.py | 15 ++- utilities/infra.py | 11 +- 6 files changed, 100 insertions(+), 253 deletions(-) delete mode 100644 tests/model_serving/model_server/authentication/test_model_mesh_authentication.py create mode 100644 tests/model_serving/model_server/model_mesh/conftest.py diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py index f0f8d28fa..0e3aa5d97 100644 --- a/tests/model_serving/model_server/authentication/conftest.py +++ b/tests/model_serving/model_server/authentication/conftest.py @@ -1,3 +1,4 @@ +import shlex from typing import Any, Generator from urllib.parse import urlparse @@ -12,8 +13,9 @@ from ocp_resources.secret import Secret from ocp_resources.service_account import ServiceAccount from ocp_resources.serving_runtime import ServingRuntime +from pyhelper_utils.shell import run_command -from utilities.infra import create_resource_view_role, create_ns, s3_endpoint_secret, create_inference_token +from utilities.infra import create_isvc_view_role, create_ns, s3_endpoint_secret, create_inference_token from tests.model_serving.model_server.utils import create_isvc from utilities.constants import ( KServeDeploymentType, @@ -83,9 +85,9 @@ def http_view_role( admin_client: DynamicClient, http_s3_caikit_serverless_inference_service: InferenceService, ) -> Role: - with create_resource_view_role( + with create_isvc_view_role( client=admin_client, - resource=http_s3_caikit_serverless_inference_service, + isvc=http_s3_caikit_serverless_inference_service, name=f"{http_s3_caikit_serverless_inference_service.name}-view", resource_names=[http_s3_caikit_serverless_inference_service.name], ) as role: @@ -97,9 +99,9 @@ def http_raw_view_role( admin_client: DynamicClient, http_s3_caikit_raw_inference_service: InferenceService, ) -> Role: - with create_resource_view_role( + with create_isvc_view_role( client=admin_client, - resource=http_s3_caikit_raw_inference_service, + isvc=http_s3_caikit_raw_inference_service, name=f"{http_s3_caikit_raw_inference_service.name}-view", resource_names=[http_s3_caikit_raw_inference_service.name], ) as role: @@ -190,9 +192,9 @@ def patched_remove_raw_authentication_isvc( @pytest.fixture(scope="class") def grpc_view_role(admin_client: DynamicClient, grpc_s3_inference_service: InferenceService) -> Role: - with create_resource_view_role( + with create_isvc_view_role( client=admin_client, - resource=grpc_s3_inference_service, + isvc=grpc_s3_inference_service, name=f"{grpc_s3_inference_service.name}-view", resource_names=[grpc_s3_inference_service.name], ) as role: @@ -220,7 +222,11 @@ def grpc_role_binding( @pytest.fixture(scope="class") def grpc_inference_token(grpc_model_service_account: ServiceAccount, grpc_role_binding: RoleBinding) -> str: - return create_inference_token(model_service_account=grpc_model_service_account) + return run_command( + command=shlex.split( + f"oc create token -n {grpc_model_service_account.namespace} {grpc_model_service_account.name}" + ) + )[1].strip() @pytest.fixture(scope="class") @@ -358,59 +364,3 @@ def http_s3_caikit_tgis_serving_runtime( enable_grpc=False, ) as model_runtime: yield model_runtime - - -@pytest.fixture() -def patched_remove_authentication_model_mesh_isvc( - admin_client: DynamicClient, - http_s3_openvino_model_mesh_inference_service: InferenceService, -) -> InferenceService: - with ResourceEditor( - patches={ - http_s3_openvino_model_mesh_inference_service: { - "metadata": { - "annotations": {Annotations.KserveAuth.SECURITY: "false"}, - } - } - } - ): - yield http_s3_openvino_model_mesh_inference_service - - -@pytest.fixture(scope="class") -def http_model_mesh_view_role( - admin_client: DynamicClient, - http_s3_ovms_model_mesh_serving_runtime: ServingRuntime, -) -> Role: - with create_resource_view_role( - client=admin_client, - resource=http_s3_ovms_model_mesh_serving_runtime, - name=f"{http_s3_ovms_model_mesh_serving_runtime.name}-view", - resource_names=[http_s3_ovms_model_mesh_serving_runtime.name], - ) as role: - yield role - - -@pytest.fixture(scope="class") -def http_model_mesh_role_binding( - admin_client: DynamicClient, - http_model_mesh_view_role: Role, - model_service_account: ServiceAccount, -) -> RoleBinding: - with RoleBinding( - client=admin_client, - namespace=model_service_account.namespace, - name=f"{Protocols.HTTP}-{model_service_account.name}-view", - role_ref_name=http_model_mesh_view_role.name, - role_ref_kind=http_model_mesh_view_role.kind, - subjects_kind=model_service_account.kind, - subjects_name=model_service_account.name, - ) as rb: - yield rb - - -@pytest.fixture(scope="class") -def http_model_mesh_inference_token( - model_service_account: ServiceAccount, http_model_mesh_role_binding: RoleBinding -) -> str: - return create_inference_token(model_service_account=model_service_account) diff --git a/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py b/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py deleted file mode 100644 index b08a2b3ee..000000000 --- a/tests/model_serving/model_server/authentication/test_model_mesh_authentication.py +++ /dev/null @@ -1,123 +0,0 @@ -import pytest - -from tests.model_serving.model_server.utils import verify_inference_response -from utilities.constants import ( - ModelFormat, - ModelStoragePath, - Protocols, -) -from utilities.inference_utils import Inference -from utilities.manifests.openvino import OPENVINO_INFERENCE_CONFIG -from utilities.manifests.tensorflow import TENSORFLOW_INFERENCE_CONFIG - -pytestmark = [pytest.mark.modelmesh, pytest.mark.sanity] - - -@pytest.mark.parametrize( - "model_namespace, http_s3_ovms_model_mesh_serving_runtime, http_s3_openvino_model_mesh_inference_service", - [ - pytest.param( - {"name": "model-mesh-multi-authentication", "modelmesh-enabled": True}, - {"enable-auth": True}, - {"model-path": ModelStoragePath.OPENVINO_EXAMPLE_MODEL}, - ) - ], - indirect=True, -) -class TestModelMeshAuthentication: - @pytest.mark.dependency(name="test_model_mesh_model_authentication_openvino_inference_with_tensorflow") - def test_model_mesh_model_authentication_openvino_inference_with_tensorflow( - self, http_s3_openvino_model_mesh_inference_service, http_model_mesh_inference_token - ): - """Verify model query with token using REST""" - verify_inference_response( - inference_service=http_s3_openvino_model_mesh_inference_service, - inference_config=OPENVINO_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - token=http_model_mesh_inference_token, - ) - - @pytest.mark.dependency(name="test_model_mesh_disabled_model_authentication") - def test_model_mesh_disabled_model_authentication(self, patched_remove_authentication_model_mesh_isvc): - """Verify model query after authentication is disabled""" - verify_inference_response( - inference_service=patched_remove_authentication_model_mesh_isvc, - inference_config=OPENVINO_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - ) - - @pytest.mark.dependency(depends=["test_model_mesh_disabled_model_authentication"]) - def test_model_mesh_re_enabled_model_authentication( - self, http_s3_openvino_model_mesh_inference_service, http_model_mesh_inference_token - ): - """Verify model query after authentication is re-enabled""" - verify_inference_response( - inference_service=http_s3_openvino_model_mesh_inference_service, - inference_config=OPENVINO_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - token=http_model_mesh_inference_token, - ) - - @pytest.mark.dependency(depends=["test_model_mesh_model_authentication_openvino_inference_with_tensorflow"]) - def test_model_mesh_model_authentication_using_invalid_token(self, http_s3_openvino_model_mesh_inference_service): - """Verify model query with an invalid token""" - verify_inference_response( - inference_service=http_s3_openvino_model_mesh_inference_service, - inference_config=OPENVINO_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - token="dummy", - authorized_user=False, - ) - - @pytest.mark.dependency(depends=["test_model_mesh_model_authentication_openvino_inference_with_tensorflow"]) - def test_model_mesh_model_authentication_without_token(self, http_s3_openvino_model_mesh_inference_service): - """Verify model query without providing a token""" - verify_inference_response( - inference_service=http_s3_openvino_model_mesh_inference_service, - inference_config=OPENVINO_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - authorized_user=False, - ) - - @pytest.mark.parametrize( - "http_s3_ovms_external_route_model_mesh_serving_runtime, http_s3_openvino_second_model_mesh_inference_service", - [ - pytest.param( - {"enable-auth": True}, - { - "model-path": ModelStoragePath.TENSORFLOW_MODEL, - "model-format": ModelFormat.TENSORFLOW, - "runtime-fixture-name": "http_s3_ovms_external_route_model_mesh_serving_runtime", - "model-version": "2", - }, - ) - ], - indirect=True, - ) - def test_model_mesh_block_cross_model_authentication( - self, - http_s3_ovms_external_route_model_mesh_serving_runtime, - http_s3_openvino_model_mesh_inference_service, - http_s3_openvino_second_model_mesh_inference_service, - http_model_mesh_inference_token, - ): - """Verify model query with a second model's token is blocked""" - verify_inference_response( - inference_service=http_s3_openvino_second_model_mesh_inference_service, - inference_config=TENSORFLOW_INFERENCE_CONFIG, - inference_type=Inference.INFER, - protocol=Protocols.HTTPS, - use_default_query=True, - token=http_model_mesh_inference_token, - authorized_user=False, - ) diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py index 2f008b71c..51a1beb0a 100644 --- a/tests/model_serving/model_server/conftest.py +++ b/tests/model_serving/model_server/conftest.py @@ -17,18 +17,15 @@ from pytest_testconfig import config as py_config from tests.model_serving.model_server.utils import create_isvc +from utilities.constants import DscComponents, StorageClassName from utilities.constants import ( - DscComponents, KServeDeploymentType, + ModelAndFormat, ModelFormat, ModelInferenceRuntime, + ModelVersion, Protocols, RuntimeTemplates, - StorageClassName, -) -from utilities.constants import ( - ModelAndFormat, - ModelVersion, ) from utilities.infra import s3_endpoint_secret from utilities.data_science_cluster_utils import update_components_in_dsc @@ -399,53 +396,3 @@ def http_s3_tensorflow_model_mesh_inference_service( model_version="2", ) as isvc: yield isvc - - -@pytest.fixture(scope="class") -def http_s3_ovms_external_route_model_mesh_serving_runtime( - request: FixtureRequest, - admin_client: DynamicClient, - model_namespace: Namespace, -) -> ServingRuntime: - with ServingRuntimeFromTemplate( - client=admin_client, - namespace=model_namespace.name, - name=f"{Protocols.HTTP}-{ModelInferenceRuntime.OPENVINO_RUNTIME}-exposed", - template_name=RuntimeTemplates.OVMS_MODEL_MESH, - multi_model=True, - protocol="REST", - resources={ - "ovms": { - "requests": {"cpu": "1", "memory": "4Gi"}, - "limits": {"cpu": "2", "memory": "8Gi"}, - }, - }, - enable_external_route=True, - enable_auth=request.param.get("enable-auth"), - ) as model_runtime: - yield model_runtime - - -@pytest.fixture(scope="class") -def http_s3_openvino_second_model_mesh_inference_service( - request: FixtureRequest, - admin_client: DynamicClient, - model_namespace: Namespace, - ci_model_mesh_endpoint_s3_secret: Secret, - model_mesh_model_service_account: ServiceAccount, -) -> InferenceService: - # Dynamically select the used ServingRuntime by passing "runtime-fixture-name" request.param - runtime = request.getfixturevalue(argname=request.param["runtime-fixture-name"]) - with create_isvc( - client=admin_client, - name=f"{Protocols.HTTP}-{ModelFormat.OPENVINO}-2", - namespace=model_namespace.name, - runtime=runtime.name, - model_service_account=model_mesh_model_service_account.name, - storage_key=ci_model_mesh_endpoint_s3_secret.name, - storage_path=request.param["model-path"], - model_format=request.param["model-format"], - deployment_mode=KServeDeploymentType.MODEL_MESH, - model_version=request.param["model-version"], - ) as isvc: - yield isvc diff --git a/tests/model_serving/model_server/model_mesh/conftest.py b/tests/model_serving/model_server/model_mesh/conftest.py new file mode 100644 index 000000000..b5bbc92bc --- /dev/null +++ b/tests/model_serving/model_server/model_mesh/conftest.py @@ -0,0 +1,67 @@ +import pytest +from _pytest.fixtures import FixtureRequest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService +from ocp_resources.namespace import Namespace +from ocp_resources.secret import Secret +from ocp_resources.service_account import ServiceAccount +from ocp_resources.serving_runtime import ServingRuntime + +from tests.model_serving.model_server.utils import create_isvc +from utilities.constants import ( + KServeDeploymentType, + ModelFormat, + ModelInferenceRuntime, + Protocols, + RuntimeTemplates, +) +from utilities.serving_runtime import ServingRuntimeFromTemplate + + +@pytest.fixture(scope="class") +def http_s3_openvino_second_model_mesh_inference_service( + request: FixtureRequest, + admin_client: DynamicClient, + model_namespace: Namespace, + ci_model_mesh_endpoint_s3_secret: Secret, + model_mesh_model_service_account: ServiceAccount, +) -> InferenceService: + # Dynamically select the used ServingRuntime by passing "runtime-fixture-name" request.param + runtime = request.getfixturevalue(argname=request.param["runtime-fixture-name"]) + with create_isvc( + client=admin_client, + name=f"{Protocols.HTTP}-{ModelFormat.OPENVINO}-2", + namespace=model_namespace.name, + runtime=runtime.name, + model_service_account=model_mesh_model_service_account.name, + storage_key=ci_model_mesh_endpoint_s3_secret.name, + storage_path=request.param["model-path"], + model_format=request.param["model-format"], + deployment_mode=KServeDeploymentType.MODEL_MESH, + model_version=request.param["model-version"], + ) as isvc: + yield isvc + + +@pytest.fixture(scope="class") +def http_s3_ovms_external_route_model_mesh_serving_runtime( + request: FixtureRequest, + admin_client: DynamicClient, + model_namespace: Namespace, +) -> ServingRuntime: + with ServingRuntimeFromTemplate( + client=admin_client, + namespace=model_namespace.name, + name=f"{Protocols.HTTP}-{ModelInferenceRuntime.OPENVINO_RUNTIME}-exposed", + template_name=RuntimeTemplates.OVMS_MODEL_MESH, + multi_model=True, + protocol="REST", + resources={ + "ovms": { + "requests": {"cpu": "1", "memory": "4Gi"}, + "limits": {"cpu": "2", "memory": "8Gi"}, + }, + }, + enable_external_route=True, + ) as model_runtime: + yield model_runtime diff --git a/tests/model_serving/model_server/ovms/model_mesh/conftest.py b/tests/model_serving/model_server/ovms/model_mesh/conftest.py index 77fbe6fa0..095c05dbb 100644 --- a/tests/model_serving/model_server/ovms/model_mesh/conftest.py +++ b/tests/model_serving/model_server/ovms/model_mesh/conftest.py @@ -1,3 +1,5 @@ +import shlex + import pytest from kubernetes.dynamic import DynamicClient from ocp_resources.resource import ResourceEditor @@ -5,11 +7,12 @@ from ocp_resources.role_binding import RoleBinding from ocp_resources.service_account import ServiceAccount from ocp_resources.serving_runtime import ServingRuntime +from pyhelper_utils.shell import run_command from utilities.constants import ( Protocols, ) -from utilities.infra import create_inference_token, create_resource_view_role +from utilities.infra import create_isvc_view_role @pytest.fixture(scope="class") @@ -17,9 +20,9 @@ def model_mesh_view_role( admin_client: DynamicClient, http_s3_openvino_model_mesh_inference_service: ServingRuntime, ) -> Role: - with create_resource_view_role( + with create_isvc_view_role( client=admin_client, - resource=http_s3_openvino_model_mesh_inference_service, + isvc=http_s3_openvino_model_mesh_inference_service, name=f"{http_s3_openvino_model_mesh_inference_service.name}-view", resource_names=[http_s3_openvino_model_mesh_inference_service.name], ) as role: @@ -49,7 +52,11 @@ def model_mesh_inference_token( model_mesh_model_service_account: ServiceAccount, model_mesh_role_binding: RoleBinding, ) -> str: - return create_inference_token(model_service_account=model_mesh_model_service_account) + return run_command( + command=shlex.split( + f"oc create token -n {model_mesh_model_service_account.namespace} {model_mesh_model_service_account.name}" + ) + )[1].strip() @pytest.fixture() diff --git a/utilities/infra.py b/utilities/infra.py index 9c9bc41b0..d1ac5f625 100644 --- a/utilities/infra.py +++ b/utilities/infra.py @@ -134,17 +134,16 @@ def s3_endpoint_secret( @contextmanager -def create_resource_view_role( +def create_isvc_view_role( client: DynamicClient, - resource: InferenceService | ServingRuntime, + isvc: InferenceService, name: str, resource_names: Optional[List[str]] = None, ) -> Role: - resources_type = "inferenceservices" if isinstance(resource, InferenceService) else "servingruntimes" rules = [ { - "apiGroups": [resource.api_group], - "resources": [resources_type], + "apiGroups": [isvc.api_group], + "resources": ["inferenceservices"], "verbs": ["get"], }, ] @@ -155,7 +154,7 @@ def create_resource_view_role( with Role( client=client, name=name, - namespace=resource.namespace, + namespace=isvc.namespace, rules=rules, ) as role: yield role From 21049562fb6ca64095d4e1247be3b7d0b6438ad2 Mon Sep 17 00:00:00 2001 From: rnetser Date: Thu, 6 Mar 2025 19:29:33 +0100 Subject: [PATCH 5/7] feat: add multi server serverless --- .../model_server/model_mesh/conftest.py | 0 .../utils.py => serverless/__init__.py} | 0 .../model_server/serverless/conftest.py | 35 ++++++++++ .../test_multiple_projects_in_ns.py | 70 +++++++++++++++++++ utilities/constants.py | 3 + 5 files changed, 108 insertions(+) delete mode 100644 tests/model_serving/model_server/model_mesh/conftest.py rename tests/model_serving/model_server/{metrics/utils.py => serverless/__init__.py} (100%) create mode 100644 tests/model_serving/model_server/serverless/conftest.py create mode 100644 tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py diff --git a/tests/model_serving/model_server/model_mesh/conftest.py b/tests/model_serving/model_server/model_mesh/conftest.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/model_serving/model_server/metrics/utils.py b/tests/model_serving/model_server/serverless/__init__.py similarity index 100% rename from tests/model_serving/model_server/metrics/utils.py rename to tests/model_serving/model_server/serverless/__init__.py diff --git a/tests/model_serving/model_server/serverless/conftest.py b/tests/model_serving/model_server/serverless/conftest.py new file mode 100644 index 000000000..3e5167c97 --- /dev/null +++ b/tests/model_serving/model_server/serverless/conftest.py @@ -0,0 +1,35 @@ +from typing import Any, Generator + +import pytest +from _pytest.fixtures import FixtureRequest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService +from ocp_resources.namespace import Namespace +from ocp_resources.secret import Secret +from ocp_resources.serving_runtime import ServingRuntime + +from utilities.constants import KServeDeploymentType, ModelName, ModelStoragePath +from utilities.inference_utils import create_isvc + + +@pytest.fixture(scope="class") +def s3_flan_small_caikit_serverless_inference_service( + request: FixtureRequest, + admin_client: DynamicClient, + model_namespace: Namespace, + serving_runtime_from_template: ServingRuntime, + s3_models_storage_uri: str, + models_endpoint_s3_secret: Secret, +) -> Generator[InferenceService, Any, Any]: + with create_isvc( + client=admin_client, + name=f"{ModelName.FLAN_T5_SMALL}", + namespace=model_namespace.name, + runtime=serving_runtime_from_template.name, + storage_key=models_endpoint_s3_secret.name, + storage_path=ModelStoragePath.FLAN_T5_SMALL_CAIKIT, + model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name, + deployment_mode=KServeDeploymentType.SERVERLESS, + external_route=True, + ) as isvc: + yield isvc diff --git a/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py b/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py new file mode 100644 index 000000000..5f2eec0ad --- /dev/null +++ b/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py @@ -0,0 +1,70 @@ +import pytest + +from tests.model_serving.model_server.utils import run_inference_multiple_times +from utilities.constants import ( + KServeDeploymentType, + ModelAndFormat, + ModelFormat, + ModelInferenceRuntime, + ModelStoragePath, + Protocols, + RuntimeTemplates, +) +from utilities.inference_utils import Inference +from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG + +pytestmark = [pytest.mark.serverless, pytest.mark.sanity] + + +@pytest.mark.polarion("ODS-2371") +@pytest.mark.parametrize( + "model_namespace, serving_runtime_from_template, s3_models_inference_service", + [ + pytest.param( + {"name": "serverless-multi-tgis-models"}, + { + "name": f"{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", + "template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING, + "multi-model": False, + "enable-http": False, + "enable-grpc": True, + }, + { + "name": f"{ModelFormat.CAIKIT}-bloom", + "deployment-mode": KServeDeploymentType.SERVERLESS, + "model-dir": ModelStoragePath.BLOOM_560M_CAIKIT, + "external-route": True, + }, + ) + ], + indirect=True, +) +class TestServerlessMultipleProjectsInNamespace: + def test_serverless_multi_tgis_models_inference_bloom( + self, + s3_models_inference_service, + ): + """Test inference with Bloom Caikit model when multiple models in the same namespace""" + run_inference_multiple_times( + isvc=s3_models_inference_service, + inference_config=TGIS_INFERENCE_CONFIG, + model_name=ModelAndFormat.BLOOM_560M_CAIKIT, + inference_type=Inference.ALL_TOKENS, + protocol=Protocols.GRPC, + run_in_parallel=True, + iterations=5, + ) + + def test_serverless_multi_tgis_models_inference_flan( + self, s3_flan_small_caikit_serverless_inference_service, s3_models_inference_service + ): + """Test inference with Flan Caikit model when multiple models in the same namespace""" + run_inference_multiple_times( + isvc=s3_flan_small_caikit_serverless_inference_service, + inference_config=TGIS_INFERENCE_CONFIG, + model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT, + inference_type=Inference.ALL_TOKENS, + protocol=Protocols.GRPC, + run_in_parallel=True, + iterations=5, + ) diff --git a/utilities/constants.py b/utilities/constants.py index f9b6ca567..e8af79d9c 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -20,6 +20,7 @@ class ModelName: FLAN_T5_SMALL: str = "flan-t5-small" FLAN_T5_SMALL_HF: str = f"{FLAN_T5_SMALL}-hf" CAIKIT_BGE_LARGE_EN: str = f"bge-large-en-v1.5-{ModelFormat.CAIKIT}" + BLOOM_560M: str = "bloom-560m" class ModelAndFormat: @@ -27,6 +28,7 @@ class ModelAndFormat: OPENVINO_IR: str = f"{ModelFormat.OPENVINO}_ir" KSERVE_OPENVINO_IR: str = f"{OPENVINO_IR}_kserve" ONNX_1: str = f"{ModelFormat.ONNX}-1" + BLOOM_560M_CAIKIT: str = f"bloom-560m-{ModelFormat.CAIKIT}" class ModelStoragePath: @@ -37,6 +39,7 @@ class ModelStoragePath: TENSORFLOW_MODEL: str = "inception_resnet_v2.pb" OPENVINO_VEHICLE_DETECTION: str = "vehicle-detection" FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}" + BLOOM_560M_CAIKIT: str = "bloom-560m/bloom-560m-caikit" class CurlOutput: From ccafeeee3b5b06a256fe20e1a6e8d8f0cba1011c Mon Sep 17 00:00:00 2001 From: rnetser Date: Thu, 6 Mar 2025 19:49:31 +0100 Subject: [PATCH 6/7] feat: add multi server serverless --- tests/model_serving/model_server/serverless/conftest.py | 5 ++--- tests/model_serving/model_server/utils.py | 9 ++++++--- utilities/constants.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/model_serving/model_server/serverless/conftest.py b/tests/model_serving/model_server/serverless/conftest.py index 3e5167c97..3fa050598 100644 --- a/tests/model_serving/model_server/serverless/conftest.py +++ b/tests/model_serving/model_server/serverless/conftest.py @@ -13,12 +13,11 @@ @pytest.fixture(scope="class") -def s3_flan_small_caikit_serverless_inference_service( +def s3_flan_small_hf_caikit_serverless_inference_service( request: FixtureRequest, admin_client: DynamicClient, model_namespace: Namespace, serving_runtime_from_template: ServingRuntime, - s3_models_storage_uri: str, models_endpoint_s3_secret: Secret, ) -> Generator[InferenceService, Any, Any]: with create_isvc( @@ -27,7 +26,7 @@ def s3_flan_small_caikit_serverless_inference_service( namespace=model_namespace.name, runtime=serving_runtime_from_template.name, storage_key=models_endpoint_s3_secret.name, - storage_path=ModelStoragePath.FLAN_T5_SMALL_CAIKIT, + storage_path=ModelStoragePath.FLAN_T5_SMALL_HF, model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name, deployment_mode=KServeDeploymentType.SERVERLESS, external_route=True, diff --git a/tests/model_serving/model_server/utils.py b/tests/model_serving/model_server/utils.py index 9bc8390db..1a9dc2595 100644 --- a/tests/model_serving/model_server/utils.py +++ b/tests/model_serving/model_server/utils.py @@ -197,7 +197,10 @@ def run_inference_multiple_times( verify_inference_response(**infer_kwargs) if futures: + exceptions = [] for result in as_completed(futures): - _exception = result.exception() - if _exception: - LOGGER.error(f"Failed to run inference. Error: {_exception}") + if _exception := result.exception(): + exceptions.append(_exception) + + if exceptions: + raise InferenceResponseError(f"Failed to run inference. Error: {exceptions}") diff --git a/utilities/constants.py b/utilities/constants.py index e8af79d9c..ee6c48910 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -39,7 +39,7 @@ class ModelStoragePath: TENSORFLOW_MODEL: str = "inception_resnet_v2.pb" OPENVINO_VEHICLE_DETECTION: str = "vehicle-detection" FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}" - BLOOM_560M_CAIKIT: str = "bloom-560m/bloom-560m-caikit" + BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}/artifacts" class CurlOutput: From 6f82b168eac520ac94dcb9ab535b0871169a7611 Mon Sep 17 00:00:00 2001 From: rnetser Date: Mon, 10 Mar 2025 13:59:32 +0100 Subject: [PATCH 7/7] feat: add multi server serverless --- .../model_server/serverless/conftest.py | 2 +- .../test_multiple_projects_in_ns.py | 19 ++++++++++--------- utilities/constants.py | 4 +++- utilities/manifests/pytorch.py | 18 ++++++++++++++++++ 4 files changed, 32 insertions(+), 11 deletions(-) create mode 100644 utilities/manifests/pytorch.py diff --git a/tests/model_serving/model_server/serverless/conftest.py b/tests/model_serving/model_server/serverless/conftest.py index 3fa050598..6390e95dc 100644 --- a/tests/model_serving/model_server/serverless/conftest.py +++ b/tests/model_serving/model_server/serverless/conftest.py @@ -22,7 +22,7 @@ def s3_flan_small_hf_caikit_serverless_inference_service( ) -> Generator[InferenceService, Any, Any]: with create_isvc( client=admin_client, - name=f"{ModelName.FLAN_T5_SMALL}", + name=f"{ModelName.FLAN_T5_SMALL}-model", namespace=model_namespace.name, runtime=serving_runtime_from_template.name, storage_key=models_endpoint_s3_secret.name, diff --git a/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py b/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py index 5f2eec0ad..459f5a132 100644 --- a/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py +++ b/tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py @@ -4,13 +4,13 @@ from utilities.constants import ( KServeDeploymentType, ModelAndFormat, - ModelFormat, - ModelInferenceRuntime, + ModelName, ModelStoragePath, Protocols, RuntimeTemplates, ) from utilities.inference_utils import Inference +from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG pytestmark = [pytest.mark.serverless, pytest.mark.sanity] @@ -23,22 +23,23 @@ pytest.param( {"name": "serverless-multi-tgis-models"}, { - "name": f"{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", - "template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING, + "name": "tgis-runtime", + "template-name": RuntimeTemplates.TGIS_GRPC_SERVING, "multi-model": False, "enable-http": False, "enable-grpc": True, }, { - "name": f"{ModelFormat.CAIKIT}-bloom", + "name": f"{ModelName.BLOOM_560M}-model", "deployment-mode": KServeDeploymentType.SERVERLESS, - "model-dir": ModelStoragePath.BLOOM_560M_CAIKIT, + "model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts", "external-route": True, }, ) ], indirect=True, ) +@pytest.mark.usefixtures("s3_flan_small_hf_caikit_serverless_inference_service") class TestServerlessMultipleProjectsInNamespace: def test_serverless_multi_tgis_models_inference_bloom( self, @@ -47,7 +48,7 @@ def test_serverless_multi_tgis_models_inference_bloom( """Test inference with Bloom Caikit model when multiple models in the same namespace""" run_inference_multiple_times( isvc=s3_models_inference_service, - inference_config=TGIS_INFERENCE_CONFIG, + inference_config=PYTORCH_TGIS_INFERENCE_CONFIG, model_name=ModelAndFormat.BLOOM_560M_CAIKIT, inference_type=Inference.ALL_TOKENS, protocol=Protocols.GRPC, @@ -56,11 +57,11 @@ def test_serverless_multi_tgis_models_inference_bloom( ) def test_serverless_multi_tgis_models_inference_flan( - self, s3_flan_small_caikit_serverless_inference_service, s3_models_inference_service + self, s3_flan_small_hf_caikit_serverless_inference_service, s3_models_inference_service ): """Test inference with Flan Caikit model when multiple models in the same namespace""" run_inference_multiple_times( - isvc=s3_flan_small_caikit_serverless_inference_service, + isvc=s3_flan_small_hf_caikit_serverless_inference_service, inference_config=TGIS_INFERENCE_CONFIG, model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT, inference_type=Inference.ALL_TOKENS, diff --git a/utilities/constants.py b/utilities/constants.py index ee6c48910..60c66e010 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -14,6 +14,7 @@ class ModelFormat: OVMS: str = "ovms" VLLM: str = "vllm" TENSORFLOW: str = "tensorflow" + PYTORCH: str = "pytorch" class ModelName: @@ -39,7 +40,7 @@ class ModelStoragePath: TENSORFLOW_MODEL: str = "inception_resnet_v2.pb" OPENVINO_VEHICLE_DETECTION: str = "vehicle-detection" FLAN_T5_SMALL_HF: str = f"{ModelName.FLAN_T5_SMALL}/{ModelName.FLAN_T5_SMALL_HF}" - BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}/artifacts" + BLOOM_560M_CAIKIT: str = f"{ModelName.BLOOM_560M}/{ModelAndFormat.BLOOM_560M_CAIKIT}" class CurlOutput: @@ -60,6 +61,7 @@ class RuntimeTemplates: OVMS_MODEL_MESH: str = ModelFormat.OVMS OVMS_KSERVE: str = f"kserve-{ModelFormat.OVMS}" CAIKIT_STANDALONE_SERVING: str = "caikit-standalone-serving-template" + TGIS_GRPC_SERVING: str = "tgis-grpc-serving-template" class ModelInferenceRuntime: diff --git a/utilities/manifests/pytorch.py b/utilities/manifests/pytorch.py new file mode 100644 index 000000000..e773b2afd --- /dev/null +++ b/utilities/manifests/pytorch.py @@ -0,0 +1,18 @@ +GENERATION_PROTO_FILEPATH: str = "utilities/manifests/text-generation-inference/generation.proto" + +PYTORCH_TGIS_INFERENCE_CONFIG = { + "default_query_model": { + "query_input": "At what temperature does water boil?", + "query_output": r'\[{"generatedTokenCount":\d+,"text":".*","inputTokenCount":\d+,"stopReason":"MAX_TOKENS"}\]', + "use_regex": True + }, + "all-tokens": { + "grpc": { + "endpoint": "fmaas.GenerationService/Generate", + "header": "mm-model-id: $model_name", + "body": '{"requests": [{"text":"$query_input"}]}', + "args": f"-proto {GENERATION_PROTO_FILEPATH}", + "response_fields_map": {"response_output": "responses"}, + } + } +}