feat: add llmd auth tests (#709)

VedantMahabaleshwarkar · web-flow · commit 21cfe434bda5 · 2025-10-23T15:31:50.000Z
* feat: add llmd auth tests

* change: use tinyllama oci for llmisvc auth tests

* change: address pr feedback

* change: split llmisvc auth tests into individual tests
diff --git a/tests/model_serving/model_server/llmd/conftest.py b/tests/model_serving/model_server/llmd/conftest.py
@@ -1,15 +1,19 @@
+from contextlib import ExitStack
 from typing import Generator
 
 import pytest
 from _pytest.fixtures import FixtureRequest
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.llm_inference_service import LLMInferenceService
 from ocp_resources.namespace import Namespace
+from ocp_resources.role import Role
+from ocp_resources.role_binding import RoleBinding
 from ocp_resources.secret import Secret
 from ocp_resources.service_account import ServiceAccount
 
 from utilities.constants import Timeout, ResourceLimits
-from utilities.infra import s3_endpoint_secret
+from utilities.infra import s3_endpoint_secret, create_inference_token
+from utilities.logger import RedactedString
 from utilities.llmd_utils import create_llmisvc
 from utilities.llmd_constants import (
     ModelStorage,
@@ -186,3 +190,143 @@ def llmd_inference_service_gpu(
 
     with create_llmisvc(**create_kwargs) as llm_service:
         yield llm_service
+
+
+@pytest.fixture(scope="class")
+def llmisvc_auth_service_account(
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator:
+    """Factory fixture to create service accounts for authentication testing."""
+    with ExitStack() as stack:
+
+        def _create_service_account(name: str) -> ServiceAccount:
+            """Create a single service account."""
+            return stack.enter_context(
+                cm=ServiceAccount(
+                    client=admin_client,
+                    namespace=unprivileged_model_namespace.name,
+                    name=name,
+                )
+            )
+
+        yield _create_service_account
+
+
+@pytest.fixture(scope="class")
+def llmisvc_auth_view_role(
+    admin_client: DynamicClient,
+) -> Generator:
+    """Factory fixture to create view roles for LLMInferenceServices."""
+    with ExitStack() as stack:
+
+        def _create_view_role(llm_service: LLMInferenceService) -> Role:
+            """Create a single view role for a given LLMInferenceService."""
+            return stack.enter_context(
+                cm=Role(
+                    client=admin_client,
+                    name=f"{llm_service.name}-view",
+                    namespace=llm_service.namespace,
+                    rules=[
+                        {
+                            "apiGroups": [llm_service.api_group],
+                            "resources": ["llminferenceservices"],
+                            "verbs": ["get"],
+                            "resourceNames": [llm_service.name],
+                        },
+                    ],
+                )
+            )
+
+        yield _create_view_role
+
+
+@pytest.fixture(scope="class")
+def llmisvc_auth_role_binding(
+    admin_client: DynamicClient,
+) -> Generator:
+    """Factory fixture to create role bindings."""
+    with ExitStack() as stack:
+
+        def _create_role_binding(
+            service_account: ServiceAccount,
+            role: Role,
+        ) -> RoleBinding:
+            """Create a single role binding."""
+            return stack.enter_context(
+                cm=RoleBinding(
+                    client=admin_client,
+                    namespace=service_account.namespace,
+                    name=f"{service_account.name}-view",
+                    role_ref_name=role.name,
+                    role_ref_kind=role.kind,
+                    subjects_kind="ServiceAccount",
+                    subjects_name=service_account.name,
+                )
+            )
+
+        yield _create_role_binding
+
+
+@pytest.fixture(scope="class")
+def llmisvc_auth_token() -> Generator:
+    """Factory fixture to create inference tokens with all required RBAC resources."""
+
+    def _create_token(
+        service_account: ServiceAccount,
+        llmisvc: LLMInferenceService,
+        view_role_factory,
+        role_binding_factory,
+    ) -> str:
+        """Create role, role binding, and return an inference token for an existing service account."""
+        # Create role and role binding (these factories manage their own cleanup via ExitStack)
+        role = view_role_factory(llm_service=llmisvc)
+        role_binding_factory(service_account=service_account, role=role)
+        return RedactedString(value=create_inference_token(model_service_account=service_account))
+
+    yield _create_token
+
+
+@pytest.fixture(scope="class")
+def llmisvc_auth(
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+    llmisvc_auth_service_account,
+) -> Generator:
+    """Factory fixture to create LLMInferenceService instances for authentication testing."""
+    with ExitStack() as stack:
+
+        def _create_llmd_auth_service(
+            service_name: str,
+            service_account_name: str,
+            storage_uri: str = ModelStorage.TINYLLAMA_OCI,
+            container_image: str = ContainerImages.VLLM_CPU,
+            container_resources: dict | None = None,
+        ) -> tuple[LLMInferenceService, ServiceAccount]:
+            """Create a single LLMInferenceService instance with its service account."""
+            if container_resources is None:
+                container_resources = {
+                    "limits": {"cpu": "1", "memory": "10Gi"},
+                    "requests": {"cpu": "100m", "memory": "8Gi"},
+                }
+
+            # Create the service account first
+            sa = llmisvc_auth_service_account(name=service_account_name)
+
+            create_kwargs = {
+                "client": admin_client,
+                "name": service_name,
+                "namespace": unprivileged_model_namespace.name,
+                "storage_uri": storage_uri,
+                "container_image": container_image,
+                "container_resources": container_resources,
+                "service_account": service_account_name,
+                "wait": True,
+                "timeout": Timeout.TIMEOUT_15MIN,
+                "enable_auth": True,
+            }
+
+            llm_service = stack.enter_context(cm=create_llmisvc(**create_kwargs))
+            return (llm_service, sa)
+
+        yield _create_llmd_auth_service
diff --git a/tests/model_serving/model_server/llmd/test_llmd_auth.py b/tests/model_serving/model_server/llmd/test_llmd_auth.py
@@ -0,0 +1,124 @@
+import pytest
+
+from tests.model_serving.model_server.llmd.utils import (
+    verify_llm_service_status,
+    verify_gateway_status,
+)
+from utilities.constants import Protocols
+from utilities.llmd_utils import verify_inference_response_llmd
+from utilities.manifests.tinyllama import TINYLLAMA_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.llmd_cpu,
+]
+
+
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace",
+    [({"name": "llmd-auth-test"})],
+    indirect=True,
+)
+class TestLLMISVCAuth:
+    """Authentication testing for LLMD."""
+
+    @pytest.fixture(scope="class", autouse=True)
+    def setup_auth_resources(
+        self,
+        llmd_gateway,
+        llmisvc_auth,
+        llmisvc_auth_token,
+        llmisvc_auth_view_role,
+        llmisvc_auth_role_binding,
+    ):
+        """Set up gateway, LLMInferenceServices, and tokens once for all tests."""
+        llmisvc_auth_prefix = "llmisvc-auth-user-"
+        sa_prefix = "llmisvc-auth-sa-"
+
+        # Create LLMInferenceService instances using the factory fixture
+        llmisvc_user_a, sa_user_a = llmisvc_auth(
+            service_name=llmisvc_auth_prefix + "a",
+            service_account_name=sa_prefix + "a",
+        )
+        llmisvc_user_b, sa_user_b = llmisvc_auth(
+            service_name=llmisvc_auth_prefix + "b",
+            service_account_name=sa_prefix + "b",
+        )
+
+        # Create tokens with all RBAC resources
+        token_user_a = llmisvc_auth_token(
+            service_account=sa_user_a,
+            llmisvc=llmisvc_user_a,
+            view_role_factory=llmisvc_auth_view_role,
+            role_binding_factory=llmisvc_auth_role_binding,
+        )
+        token_user_b = llmisvc_auth_token(
+            service_account=sa_user_b,
+            llmisvc=llmisvc_user_b,
+            view_role_factory=llmisvc_auth_view_role,
+            role_binding_factory=llmisvc_auth_role_binding,
+        )
+
+        # Verify all resources are ready
+        assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
+        assert verify_llm_service_status(llmisvc_user_a), "LLMInferenceService user A should be ready"
+        assert verify_llm_service_status(llmisvc_user_b), "LLMInferenceService user B should be ready"
+
+        # Store resources as class attributes for use in tests
+        TestLLMISVCAuth.llmisvc_user_a = llmisvc_user_a
+        TestLLMISVCAuth.llmisvc_user_b = llmisvc_user_b
+        TestLLMISVCAuth.token_user_a = token_user_a
+        TestLLMISVCAuth.token_user_b = token_user_b
+
+    def test_llmisvc_authorized(self):
+        """Test that authorized users can access their own LLMInferenceServices."""
+        # Verify inference for user A with user A's token (should succeed)
+        verify_inference_response_llmd(
+            llm_service=self.llmisvc_user_a,
+            inference_config=TINYLLAMA_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=False,
+            model_name=self.llmisvc_user_a.name,
+            token=self.token_user_a,
+            authorized_user=True,
+        )
+
+        # Verify inference for user B with user B's token (should succeed)
+        verify_inference_response_llmd(
+            llm_service=self.llmisvc_user_b,
+            inference_config=TINYLLAMA_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=False,
+            model_name=self.llmisvc_user_b.name,
+            token=self.token_user_b,
+            authorized_user=True,
+        )
+
+    def test_llmisvc_unauthorized(self):
+        """Test that unauthorized access to LLMInferenceServices is properly blocked."""
+        # Verify that user B's token cannot access user A's service (should fail)
+        verify_inference_response_llmd(
+            llm_service=self.llmisvc_user_a,
+            inference_config=TINYLLAMA_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=False,
+            model_name=self.llmisvc_user_a.name,
+            token=self.token_user_b,
+            authorized_user=False,
+        )
+
+        # Verify that accessing user A's service without a token fails
+        verify_inference_response_llmd(
+            llm_service=self.llmisvc_user_a,
+            inference_config=TINYLLAMA_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=False,
+            authorized_user=False,
+        )
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -296,6 +296,7 @@ class S3:
 
     class HuggingFace:
         TINYLLAMA: str = "hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+        OPT125M: str = "hf://facebook/opt-125m"
 
 
 class OCIRegistry:
diff --git a/utilities/llmd_constants.py b/utilities/llmd_constants.py
@@ -35,6 +35,7 @@ class ModelStorage:
     TINYLLAMA_S3: str = SharedModelStorage.S3.TINYLLAMA
     S3_QWEN: str = SharedModelStorage.S3.QWEN_7B_INSTRUCT
     HF_TINYLLAMA: str = SharedModelStorage.HuggingFace.TINYLLAMA
+    HF_OPT125M: str = SharedModelStorage.HuggingFace.OPT125M
 
 
 class ContainerImages:
diff --git a/utilities/llmd_utils.py b/utilities/llmd_utils.py
@@ -284,7 +284,9 @@ def create_llmisvc(
             template_config["imagePullSecrets"] = [{"name": secret} for secret in image_pull_secrets]
 
     if enable_auth:
-        annotations["serving.kserve.io/auth"] = "true"
+        annotations["security.opendatahub.io/enable-auth"] = "true"
+    else:
+        annotations["security.opendatahub.io/enable-auth"] = "false"
 
     LOGGER.info(f"Creating LLMInferenceService {name} in namespace {namespace}")