opendatahub-io
diff --git a/‎tests/model_serving/model_server/llmd_v2/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/model_serving/model_server/llmd_v2/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/model_serving/model_server/llmd_v2/conftest.py‎
Lines changed: 301 additions & 0 deletions b/‎tests/model_serving/model_server/llmd_v2/conftest.py‎
Lines changed: 301 additions & 0 deletions
diff --git a/‎tests/model_serving/model_server/llmd_v2/llmd_configs/README.md‎
Lines changed: 35 additions & 0 deletions b/‎tests/model_serving/model_server/llmd_v2/llmd_configs/README.md‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎tests/model_serving/model_server/llmd_v2/llmd_configs/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎tests/model_serving/model_server/llmd_v2/llmd_configs/__init__.py‎
Lines changed: 17 additions & 0 deletions
@@ -0,0 +1 @@
+"""llm-d test module for OpenDataHub and OpenShift AI."""
@@ -0,0 +1,301 @@
+import logging
+from collections.abc import Generator
+from contextlib import ExitStack, contextmanager
+from typing import Any
+
+import pytest
+import yaml
+
+logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.config_map import ConfigMap
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.role import Role
+from ocp_resources.role_binding import RoleBinding
+from ocp_resources.service_account import ServiceAccount
+from simple_logger.logger import get_logger
+
+from tests.model_serving.model_server.llmd_v2.llmd_configs import TinyLlamaOciConfig
+from utilities.constants import Timeout
+from utilities.infra import create_inference_token, s3_endpoint_secret, update_configmap_data
+from utilities.llmd_constants import LLMDGateway
+from utilities.llmd_utils import create_llmd_gateway
+from utilities.logger import RedactedString
+
+LOGGER = get_logger(name=__name__)
+
+
+# ===========================================
+#  Gateway
+# ===========================================
+@pytest.fixture(scope="session", autouse=True)
+def shared_llmd_gateway(admin_client: DynamicClient) -> Generator[Gateway]:
+    """Shared LLMD gateway for all tests."""
+    with create_llmd_gateway(
+        client=admin_client,
+        namespace=LLMDGateway.DEFAULT_NAMESPACE,
+        gateway_class_name=LLMDGateway.DEFAULT_CLASS,
+        wait_for_condition=True,
+        timeout=Timeout.TIMEOUT_1MIN,
+        teardown=True,
+    ) as gateway:
+        yield gateway
+
+
+# ===========================================
+#  Storage — S3 secret + service account
+# ===========================================
+@pytest.fixture(scope="class")
+def s3_service_account(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator[str]:
+    """Create S3 secret + service account. Resolved automatically for S3 configs."""
+    with ExitStack() as stack:
+        secret = stack.enter_context(
+            cm=s3_endpoint_secret(
+                client=admin_client,
+                name="llmd-s3-secret",
+                namespace=unprivileged_model_namespace.name,
+                aws_access_key=request.getfixturevalue(argname="aws_access_key_id"),
+                aws_secret_access_key=request.getfixturevalue(argname="aws_secret_access_key"),
+                aws_s3_region=request.getfixturevalue(argname="models_s3_bucket_region"),
+                aws_s3_bucket=request.getfixturevalue(argname="models_s3_bucket_name"),
+                aws_s3_endpoint=request.getfixturevalue(argname="models_s3_bucket_endpoint"),
+            )
+        )
+        sa = stack.enter_context(
+            cm=ServiceAccount(
+                client=admin_client,
+                namespace=unprivileged_model_namespace.name,
+                name="llmd-s3-service-account",
+                secrets=[{"name": secret.name}],
+            )
+        )
+        yield sa.name
+
+
+# ===========================================
+#  LLMInferenceService creation
+# ===========================================
+@pytest.fixture(scope="class")
+def llmisvc(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator[LLMInferenceService]:
+    """LLMInferenceService fixture driven by a config class.
+
+    Usage:
+        NAMESPACE = ns_from_file(__file__)
+
+        @pytest.mark.parametrize(
+            "unprivileged_model_namespace, llmisvc",
+            [({"name": NAMESPACE}, SomeConfig)],
+            indirect=True,
+        )
+    """
+    config_cls = request.param
+    namespace = unprivileged_model_namespace.name
+
+    service_account = None
+    if config_cls.storage_uri.startswith("s3://"):
+        service_account = request.getfixturevalue(argname="s3_service_account")
+
+    with _create_llmisvc_from_config(
+        config_cls=config_cls, namespace=namespace, client=admin_client, service_account=service_account
+    ) as svc:
+        yield svc
+
+
+@pytest.fixture(scope="class")
+def llmisvc_with_auth(
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator[list[dict]]:
+    """Create 2 auth-enabled LLMISVCs with separate tokens for cross-auth testing."""
+    namespace = unprivileged_model_namespace.name
+    with ExitStack() as stack:
+        pairs = []
+        for i in range(2):
+            sa = stack.enter_context(
+                cm=ServiceAccount(
+                    client=admin_client,
+                    namespace=namespace,
+                    name=f"auth-sa-{i}",
+                )
+            )
+            cfg = TinyLlamaOciConfig.with_overrides(
+                name=f"llmisvc-auth-{i}",
+            )
+            svc = stack.enter_context(
+                cm=_create_llmisvc_from_config(
+                    config_cls=cfg,
+                    namespace=namespace,
+                    client=admin_client,
+                    service_account=sa.name,
+                    extra_annotations={"security.opendatahub.io/enable-auth": "true"},
+                )
+            )
+            role = stack.enter_context(
+                cm=Role(
+                    client=admin_client,
+                    name=f"llmisvc-auth-{i}-view",
+                    namespace=unprivileged_model_namespace.name,
+                    rules=[
+                        {
+                            "apiGroups": [svc.api_group],
+                            "resources": ["llminferenceservices"],
+                            "verbs": ["get"],
+                            "resourceNames": [svc.name],
+                        }
+                    ],
+                )
+            )
+            stack.enter_context(
+                cm=RoleBinding(
+                    client=admin_client,
+                    namespace=unprivileged_model_namespace.name,
+                    name=f"auth-sa-{i}-view",
+                    role_ref_name=role.name,
+                    role_ref_kind=role.kind,
+                    subjects_kind="ServiceAccount",
+                    subjects_name=sa.name,
+                )
+            )
+            token = RedactedString(value=create_inference_token(model_service_account=sa))
+            pairs.append({"service": svc, "token": token})
+        yield pairs
+
+
+# ===========================================
+#  Auth — SA + RBAC + token
+# ===========================================
+@pytest.fixture(scope="class")
+def llmisvc_token(
+    admin_client: DynamicClient,
+    llmisvc: LLMInferenceService,
+) -> Generator[str]:
+    """Create a dedicated SA with RBAC and return an auth token for the llmisvc."""
+    sa_name = f"{llmisvc.name}-auth-sa"
+    with (
+        ServiceAccount(client=admin_client, name=sa_name, namespace=llmisvc.namespace) as sa,
+        Role(
+            client=admin_client,
+            name=f"{llmisvc.name}-view",
+            namespace=llmisvc.namespace,
+            rules=[
+                {
+                    "apiGroups": [llmisvc.api_group],
+                    "resources": ["llminferenceservices"],
+                    "verbs": ["get"],
+                    "resourceNames": [llmisvc.name],
+                },
+            ],
+        ) as role,
+        RoleBinding(
+            client=admin_client,
+            namespace=llmisvc.namespace,
+            name=f"{sa_name}-view",
+            role_ref_name=role.name,
+            role_ref_kind=role.kind,
+            subjects_kind="ServiceAccount",
+            subjects_name=sa_name,
+        ),
+    ):
+        yield RedactedString(value=create_inference_token(model_service_account=sa))
+
+
+# ===========================================
+#  Monitoring
+# ===========================================
+@pytest.fixture(scope="session", autouse=True)
+def llmd_user_workload_monitoring_config_map(
+    admin_client: DynamicClient, cluster_monitoring_config: ConfigMap
+) -> Generator[ConfigMap]:
+    """Ephemeral user workload monitoring for LLMD tests."""
+    data = {
+        "config.yaml": yaml.dump({
+            "prometheus": {
+                "logLevel": "debug",
+                "retention": "15d",
+            }
+        })
+    }
+
+    with update_configmap_data(
+        client=admin_client,
+        name="user-workload-monitoring-config",
+        namespace="openshift-user-workload-monitoring",
+        data=data,
+    ) as cm:
+        yield cm
+
+
+# ===========================================
+#  Helpers (not fixtures)
+# ===========================================
+@contextmanager
+def _create_llmisvc_from_config(
+    config_cls: type,
+    namespace: str,
+    client: DynamicClient,
+    service_account: str | None = None,
+    extra_annotations: dict[str, str] | None = None,
+) -> Generator[LLMInferenceService, Any]:
+    """Create an LLMInferenceService from a config class."""
+    LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
+
+    model: dict[str, Any] = {"uri": config_cls.storage_uri}
+    if config_cls.model_name:
+        model["name"] = config_cls.model_name
+
+    main_container: dict[str, Any] = {"name": "main"}
+    main_container.update({
+        k: v
+        for k, v in {
+            "image": config_cls.container_image,
+            "resources": config_cls.container_resources(),
+            "env": config_cls.container_env(),
+            "livenessProbe": config_cls.liveness_probe(),
+            "readinessProbe": config_cls.readiness_probe(),
+        }.items()
+        if v
+    })
+
+    template: dict[str, Any] = {
+        "configRef": config_cls.template_config_ref,
+        "containers": [main_container],
+    }
+    if service_account:
+        template["serviceAccountName"] = service_account
+
+    annotations = config_cls.annotations()
+    if extra_annotations:
+        annotations.update(extra_annotations)
+
+    prefill = config_cls.prefill_config()
+
+    svc_kwargs: dict[str, Any] = {
+        "client": client,
+        "name": config_cls.name,
+        "namespace": namespace,
+        "annotations": annotations,
+        "label": config_cls.labels(),
+        "teardown": True,
+        "model": model,
+        "replicas": config_cls.replicas,
+        "router": config_cls.router_config(),
+        "template": template,
+    }
+    if prefill is not None:
+        if service_account and "template" in prefill:
+            prefill["template"]["serviceAccountName"] = service_account
+        svc_kwargs["prefill"] = prefill
+
+    with LLMInferenceService(**svc_kwargs) as llm_service:
+        yield llm_service
@@ -0,0 +1,35 @@
+# llmd_configs
+
+One config class per LLMInferenceService test scenario. Each class is the single source of truth for its deployment.
+
+## Hierarchy
+
+```
+LLMISvcConfig (config_base.py)              # Base — defaults, helpers
+├── CpuConfig (config_base.py)              # CPU image, env, resources
+│   ├── TinyLlamaOciConfig                  # OCI storage
+│   ├── TinyLlamaS3Config                   # S3 storage
+│   └── Opt125mHfConfig                     # HuggingFace storage
+└── GpuConfig (config_base.py)              # GPU resources
+    ├── QwenS3Config                        # S3 storage
+    │   ├── PrefillDecodeConfig             # prefill-decode disaggregation
+    │   └── EstimatedPrefixCacheConfig      # estimated prefix cache
+    └── QwenHfConfig                        # HuggingFace storage
+        └── PrecisePrefixCacheConfig        # precise prefix cache
+```
+
+Model+storage classes are in `config_models.py`. Feature configs are in their own files.
+
+## Usage
+
+```python
+@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig], indirect=True)
+def test_something(self, llmisvc):
+    ...
+```
+
+Override inline with `with_overrides()`:
+
+```python
+@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig.with_overrides(replicas=2)], indirect=True)
+```
@@ -0,0 +1,17 @@
+from .config_base import LLMISvcConfig
+from .config_estimated_prefix_cache import EstimatedPrefixCacheConfig
+from .config_models import Opt125mHfConfig, QwenHfConfig, QwenS3Config, TinyLlamaOciConfig, TinyLlamaS3Config
+from .config_precise_prefix_cache import PrecisePrefixCacheConfig
+from .config_prefill_decode import PrefillDecodeConfig
+
+__all__ = [
+    "EstimatedPrefixCacheConfig",
+    "LLMISvcConfig",
+    "Opt125mHfConfig",
+    "PrecisePrefixCacheConfig",
+    "PrefillDecodeConfig",
+    "QwenHfConfig",
+    "QwenS3Config",
+    "TinyLlamaOciConfig",
+    "TinyLlamaS3Config",
+]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""llm-d test module for OpenDataHub and OpenShift AI."""`