opendatahub-io · mwaykole · Sep 23, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
@@ -33,6 +33,7 @@ markers =
     gpu: Mark tests which require GPU resources
     multinode: Mark tests which require multiple nodes
     keda: Mark tests which are testing KEDA scaling
+    llmd_cpu: Mark tests which are testing LLMD (LLM Deployment) with CPU resources
 
     # Model Registry:
     custom_namespace: mark tests that are to be run with custom namespace

@@ -0,0 +1 @@
+"""LLMD (LLM Deployment) test module for OpenDataHub and OpenShift AI."""
@@ -0,0 +1,185 @@
+from typing import Generator
+
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.secret import Secret
+from ocp_resources.service_account import ServiceAccount
+
+from utilities.constants import Timeout
+from utilities.infra import s3_endpoint_secret
+from utilities.llmd_utils import create_llmd_gateway, create_llmisvc
+from utilities.llmd_constants import (
+    DEFAULT_GATEWAY_NAMESPACE,
+    VLLM_STORAGE_OCI,
+    VLLM_CPU_IMAGE,
+    DEFAULT_S3_STORAGE_PATH,
+)
+
+
+@pytest.fixture(scope="class")
+def gateway_namespace(admin_client: DynamicClient) -> str:
+    return DEFAULT_GATEWAY_NAMESPACE
+
+
+@pytest.fixture(scope="class")
+def llmd_s3_secret(
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+    aws_access_key_id: str,
+    aws_secret_access_key: str,
+    models_s3_bucket_name: str,
+    models_s3_bucket_region: str,
+    models_s3_bucket_endpoint: str,
+) -> Generator[Secret, None, None]:
+    with s3_endpoint_secret(
+        client=admin_client,
+        name="llmd-s3-secret",
+        namespace=unprivileged_model_namespace.name,
+        aws_access_key=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+        aws_s3_region=models_s3_bucket_region,
+        aws_s3_bucket=models_s3_bucket_name,
+        aws_s3_endpoint=models_s3_bucket_endpoint,
+    ) as secret:
+        yield secret
+
+
+@pytest.fixture(scope="class")
+def llmd_s3_service_account(
+    admin_client: DynamicClient, llmd_s3_secret: Secret
+) -> Generator[ServiceAccount, None, None]:
+    with ServiceAccount(
+        client=admin_client,
+        namespace=llmd_s3_secret.namespace,
+        name="llmd-s3-service-account",
+        secrets=[{"name": llmd_s3_secret.name}],
+    ) as sa:
+        yield sa
+
+
+@pytest.fixture(scope="class")
+def llmd_gateway(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    gateway_namespace: str,
+) -> Generator[Gateway, None, None]:
+    """
+    Pytest fixture for LLMD Gateway management using create_llmd_gateway.
+
+    Implements persistent LLMD gateway strategy:
+    - Reuses existing gateways if available
+    - Creates new gateway only if needed
+    - Does not delete gateway in teardown
+    - Uses LLMD-specific gateway configuration
+    """
+    if isinstance(request.param, str):
+        gateway_class_name = request.param
+        kwargs = {}
+    else:
+        gateway_class_name = request.param.get("gateway_class_name", "openshift-default")
+        kwargs = {k: v for k, v in request.param.items() if k != "gateway_class_name"}
+
+    with create_llmd_gateway(
+        client=admin_client,
+        namespace=gateway_namespace,
+        gateway_class_name=gateway_class_name,
+        wait_for_condition=True,
+        timeout=Timeout.TIMEOUT_5MIN,
+        teardown=False,  # Don't delete gateway in teardown
+        **kwargs,
+    ) as gateway:
+        yield gateway
+
+
+@pytest.fixture(scope="class")
+def llmd_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator[LLMInferenceService, None, None]:
+    if isinstance(request.param, str):
+        name_suffix = request.param
+        kwargs = {}
+    else:
+        name_suffix = request.param.get("name_suffix", "basic")
+        kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
+
+    service_name = kwargs.get("name", f"llm-{name_suffix}")
+
+    if "llmd_gateway" in request.fixturenames:
+        request.getfixturevalue(argname="llmd_gateway")
+    container_resources = kwargs.get(
+        "container_resources",
+        {
+            "limits": {"cpu": "1", "memory": "10Gi"},
+            "requests": {"cpu": "100m", "memory": "8Gi"},
+        },
+    )
+
+    with create_llmisvc(
+        client=admin_client,
+        name=service_name,
+        namespace=unprivileged_model_namespace.name,
+        storage_uri=kwargs.get("storage_uri", VLLM_STORAGE_OCI),
+        container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
+        container_resources=container_resources,
+        wait=True,
+        timeout=Timeout.TIMEOUT_15MIN,
+        **{k: v for k, v in kwargs.items() if k != "name"},
+    ) as llm_service:
+        yield llm_service
+
+
+@pytest.fixture(scope="class")
+def llmd_inference_service_s3(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+    llmd_s3_secret: Secret,
+    llmd_s3_service_account: ServiceAccount,
+) -> Generator[LLMInferenceService, None, None]:
+    if isinstance(request.param, str):
+        name_suffix = request.param
+        kwargs = {"storage_path": DEFAULT_S3_STORAGE_PATH}
+    else:
+        name_suffix = request.param.get("name_suffix", "s3")
+        kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
+
+    service_name = kwargs.get("name", f"llm-{name_suffix}")
+
+    if "storage_key" not in kwargs:
+        kwargs["storage_key"] = llmd_s3_secret.name
+
+    if "storage_path" not in kwargs:
+        kwargs["storage_path"] = DEFAULT_S3_STORAGE_PATH
+
+    container_resources = kwargs.get(
+        "container_resources",
+        {
+            "limits": {"cpu": "1", "memory": "10Gi"},
+            "requests": {"cpu": "100m", "memory": "8Gi"},
+        },
+    )
+
+    with create_llmisvc(
+        client=admin_client,
+        name=service_name,
+        namespace=unprivileged_model_namespace.name,
+        storage_key=kwargs.get("storage_key"),
+        storage_path=kwargs.get("storage_path"),
+        container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
+        container_resources=container_resources,
+        service_account=llmd_s3_service_account.name,
+        wait=True,
+        timeout=Timeout.TIMEOUT_15MIN,
+        **{
+            k: v
+            for k, v in kwargs.items()
+            if k not in ["name", "storage_key", "storage_path", "container_image", "container_resources"]
+        },
+    ) as llm_service:
+        yield llm_service
@@ -0,0 +1,40 @@
+import pytest
+
+from tests.model_serving.model_server.llmd.utils import (
+    verify_llm_service_status,
+    verify_gateway_status,
+    verify_llmd_pods_not_restarted,
+)
+from utilities.constants import Protocols
+from utilities.llmd_utils import verify_inference_response_llmd
+from utilities.llmd_constants import BASIC_LLMD_PARAMS
+from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.llmd_cpu,
+]
+
+
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, llmd_gateway, llmd_inference_service",
+    BASIC_LLMD_PARAMS,
+    indirect=True,
+)
+class TestLLMDOCICPUInference:
+    """LLMD inference testing with OCI storage and CPU runtime using vLLM."""
+
+    def test_llmd_oci(self, admin_client, llmd_gateway, llmd_inference_service):
+        """Test LLMD inference with OCI storage and CPU runtime."""
+        assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
+        assert verify_llm_service_status(llmd_inference_service), "LLMInferenceService should be ready"
+
+        verify_inference_response_llmd(
+            llm_service=llmd_inference_service,
+            inference_config=OPT125M_CPU_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=True,
+        )
+
+        verify_llmd_pods_not_restarted(client=admin_client, llm_service=llmd_inference_service)
@@ -0,0 +1,40 @@
+import pytest
+
+from tests.model_serving.model_server.llmd.utils import (
+    verify_llm_service_status,
+    verify_gateway_status,
+    verify_llmd_pods_not_restarted,
+)
+from utilities.constants import Protocols
+from utilities.llmd_utils import verify_inference_response_llmd
+from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.llmd_cpu,
+]
+
+
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, llmd_gateway, llmd_inference_service_s3",
+    [({"name": "llmd-s3-test"}, "openshift-default", {"storage_path": "opt-125m/"})],
+    indirect=True,
+)
+@pytest.mark.usefixtures("valid_aws_config")
+class TestLLMDS3Inference:
+    """LLMD inference testing with S3 storage."""
+
+    def test_llmd_s3(self, admin_client, llmd_gateway, llmd_inference_service_s3):
+        """Test LLMD inference with S3 storage."""
+        assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
+        assert verify_llm_service_status(llmd_inference_service_s3), "LLMInferenceService should be ready"
+
+        verify_inference_response_llmd(
+            llm_service=llmd_inference_service_s3,
+            inference_config=OPT125M_CPU_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=True,
+        )
+
+        verify_llmd_pods_not_restarted(client=admin_client, llm_service=llmd_inference_service_s3)
@@ -0,0 +1,109 @@
+"""
+Utility functions for LLM Deployment (LLMD) tests.
+
+This module provides helper functions for LLMD test operations using ocp_resources.
+Follows the established model server utils pattern for consistency.
+"""
+
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+from ocp_resources.pod import Pod
+from simple_logger.logger import get_logger
+
+from utilities.exceptions import PodContainersRestartError
+
+
+LOGGER = get_logger(name=__name__)
+
+
+def verify_gateway_status(gateway: Gateway) -> bool:
+    """
+    Verify that a Gateway is properly configured and programmed.
+
+    Args:
+        gateway (Gateway): The Gateway resource to verify
+
+    Returns:
+        bool: True if gateway is properly configured, False otherwise
+    """
+    if not gateway.exists:
+        LOGGER.warning(f"Gateway {gateway.name} does not exist")
+        return False
+
+    conditions = gateway.instance.status.get("conditions", [])
+    for condition in conditions:
+        if condition["type"] == "Programmed" and condition["status"] == "True":
+            LOGGER.info(f"Gateway {gateway.name} is programmed and ready")
+            return True
+
+    LOGGER.warning(f"Gateway {gateway.name} is not in Programmed state")
+    return False
+
+
+def verify_llm_service_status(llm_service: LLMInferenceService) -> bool:
+    """
+    Verify that an LLMInferenceService is properly configured and ready.
+
+    Args:
+        llm_service (LLMInferenceService): The LLMInferenceService resource to verify
+
+    Returns:
+        bool: True if service is properly configured, False otherwise
+    """
+    if not llm_service.exists:
+        LOGGER.warning(f"LLMInferenceService {llm_service.name} does not exist")
+        return False
+
+    conditions = llm_service.instance.status.get("conditions", [])
+    for condition in conditions:
+        if condition["type"] == "Ready" and condition["status"] == "True":
+            LOGGER.info(f"LLMInferenceService {llm_service.name} is ready")
+            return True
+
+    LOGGER.warning(f"LLMInferenceService {llm_service.name} is not in Ready state")
+    return False
+
+
+def verify_llmd_pods_not_restarted(client: DynamicClient, llm_service: LLMInferenceService) -> None:
+    """
+    Verify that LLMD inference pods containers have not restarted.
+
+    This function checks for container restarts in pods related to the specific LLMInferenceService.
+
+    Args:
+        client (DynamicClient): DynamicClient instance
+        llm_service (LLMInferenceService): The LLMInferenceService to check pods for
+
+    Raises:
+        PodContainersRestartError: If any containers in LLMD pods have restarted
+    """
+    LOGGER.info(f"Verifying that pods for LLMInferenceService {llm_service.name} have not restarted")
+
+    restarted_containers = {}
+
+    for pod in Pod.get(
+        dyn_client=client,
+        namespace=llm_service.namespace,
+        label_selector=(
+            f"{Pod.ApiGroup.APP_KUBERNETES_IO}/part-of=llminferenceservice,"
+            f"{Pod.ApiGroup.APP_KUBERNETES_IO}/name={llm_service.name}"
+        ),
+    ):
+        labels = pod.instance.metadata.get("labels", {})
+        if labels.get("kserve.io/component") == "workload":
+            LOGGER.debug(f"Checking pod {pod.name} for container restarts")
+
+            if pod.instance.status.containerStatuses:
+                if _restarted_containers := [
+                    container.name for container in pod.instance.status.containerStatuses if container.restartCount > 0
+                ]:
+                    restarted_containers[pod.name] = _restarted_containers
+                    LOGGER.warning(f"Pod {pod.name} has restarted containers: {_restarted_containers}")
+
+    if restarted_containers:
+        error_msg = f"LLMD inference containers restarted for {llm_service.name}: {restarted_containers}"
+        LOGGER.error(error_msg)
+        raise PodContainersRestartError(error_msg)
+
+    LOGGER.info(f"All pods for LLMInferenceService {llm_service.name} have no container restarts")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""LLMD (LLM Deployment) test module for OpenDataHub and OpenShift AI."""