opendatahub-io · mwaykole · Sep 23, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
@@ -33,6 +33,7 @@ markers =
     gpu: Mark tests which require GPU resources
     multinode: Mark tests which require multiple nodes
     keda: Mark tests which are testing KEDA scaling
+    llmd_cpu: Mark tests which are testing LLMD (LLM Deployment) with CPU resources
 
     # Model Registry:
     custom_namespace: mark tests that are to be run with custom namespace

@@ -0,0 +1,174 @@
+from typing import Generator
+
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+from ocp_resources.namespace import Namespace
+from ocp_resources.secret import Secret
+from ocp_resources.service_account import ServiceAccount
+
+from utilities.constants import Timeout
+from utilities.infra import s3_endpoint_secret
+from utilities.llmd_utils import create_gateway, create_llmisvc
+from utilities.llmd_constants import (
+    DEFAULT_GATEWAY_NAMESPACE,
+    VLLM_STORAGE_OCI,
+    VLLM_CPU_IMAGE,
+    DEFAULT_S3_STORAGE_PATH,
+)
+
+
+@pytest.fixture(scope="class")
+def gateway_namespace(admin_client: DynamicClient) -> str:
+    return DEFAULT_GATEWAY_NAMESPACE
+
+
+@pytest.fixture(scope="class")
+def llmd_s3_secret(
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+    aws_access_key_id: str,
+    aws_secret_access_key: str,
+    models_s3_bucket_name: str,
+    models_s3_bucket_region: str,
+    models_s3_bucket_endpoint: str,
+) -> Generator[Secret, None, None]:
+    with s3_endpoint_secret(
+        client=admin_client,
+        name="llmd-s3-secret",
+        namespace=unprivileged_model_namespace.name,
+        aws_access_key=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+        aws_s3_region=models_s3_bucket_region,
+        aws_s3_bucket=models_s3_bucket_name,
+        aws_s3_endpoint=models_s3_bucket_endpoint,
+    ) as secret:
+        yield secret
+
+
+@pytest.fixture(scope="class")
+def llmd_s3_service_account(
+    admin_client: DynamicClient, llmd_s3_secret: Secret
+) -> Generator[ServiceAccount, None, None]:
+    with ServiceAccount(
+        client=admin_client,
+        namespace=llmd_s3_secret.namespace,
+        name="llmd-s3-service-account",
+        secrets=[{"name": llmd_s3_secret.name}],
+    ) as sa:
+        yield sa
+
+
+@pytest.fixture(scope="class")
+def llmd_gateway(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    gateway_namespace: str,
+) -> Generator[Gateway, None, None]:
+    if isinstance(request.param, str):
+        gateway_class_name = request.param
+        kwargs = {}
+    else:
+        gateway_class_name = request.param.get("gateway_class_name", "openshift-default")
+        kwargs = {k: v for k, v in request.param.items() if k != "gateway_class_name"}
+    with create_gateway(
+        client=admin_client,
+        namespace=gateway_namespace,
+        gateway_class_name=gateway_class_name,
+        wait_for_condition=True,
+        timeout=Timeout.TIMEOUT_5MIN,
+        **kwargs,
+    ) as gateway:
+        yield gateway
+
+
+@pytest.fixture(scope="class")
+def llmd_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+) -> Generator[LLMInferenceService, None, None]:
+    if isinstance(request.param, str):
+        name_suffix = request.param
+        kwargs = {}
+    else:
+        name_suffix = request.param.get("name_suffix", "basic")
+        kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
+
+    service_name = kwargs.get("name", f"llm-{name_suffix}")
+
+    if "llmd_gateway" in request.fixturenames:
+        request.getfixturevalue(argname="llmd_gateway")
+    container_resources = kwargs.get(
+        "container_resources",
+        {
+            "limits": {"cpu": "1", "memory": "10Gi"},
+            "requests": {"cpu": "100m", "memory": "8Gi"},
+        },
+    )
+
+    with create_llmisvc(
+        client=admin_client,
+        name=service_name,
+        namespace=unprivileged_model_namespace.name,
+        storage_uri=kwargs.get("storage_uri", VLLM_STORAGE_OCI),
+        container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
+        container_resources=container_resources,
+        wait=True,
+        timeout=Timeout.TIMEOUT_15MIN,
+        **{k: v for k, v in kwargs.items() if k != "name"},
+    ) as llm_service:
+        yield llm_service
+
+
+@pytest.fixture(scope="class")
+def llmd_inference_service_s3(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    unprivileged_model_namespace: Namespace,
+    llmd_s3_secret: Secret,
+    llmd_s3_service_account: ServiceAccount,
+) -> Generator[LLMInferenceService, None, None]:
+    if isinstance(request.param, str):
+        name_suffix = request.param
+        kwargs = {"storage_path": DEFAULT_S3_STORAGE_PATH}
+    else:
+        name_suffix = request.param.get("name_suffix", "s3")
+        kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
+
+    service_name = kwargs.get("name", f"llm-{name_suffix}")
+
+    if "storage_key" not in kwargs:
+        kwargs["storage_key"] = llmd_s3_secret.name
+
+    if "storage_path" not in kwargs:
+        kwargs["storage_path"] = DEFAULT_S3_STORAGE_PATH
+
+    container_resources = kwargs.get(
+        "container_resources",
+        {
+            "limits": {"cpu": "1", "memory": "10Gi"},
+            "requests": {"cpu": "100m", "memory": "8Gi"},
+        },
+    )
+
+    with create_llmisvc(
+        client=admin_client,
+        name=service_name,
+        namespace=unprivileged_model_namespace.name,
+        storage_key=kwargs.get("storage_key"),
+        storage_path=kwargs.get("storage_path"),
+        container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
+        container_resources=container_resources,
+        service_account=llmd_s3_service_account.name,
+        wait=True,
+        timeout=Timeout.TIMEOUT_15MIN,
+        **{
+            k: v
+            for k, v in kwargs.items()
+            if k not in ["name", "storage_key", "storage_path", "container_image", "container_resources"]
+        },
+    ) as llm_service:
+        yield llm_service
@@ -0,0 +1,34 @@
+import pytest
+
+from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
+from utilities.constants import Protocols
+from utilities.llmd_utils import verify_inference_response_llmd
+
+from utilities.llmd_constants import BASIC_LLMD_PARAMS
+from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.llmd_cpu,
+]
+
+
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, llmd_gateway, llmd_inference_service",
+    BASIC_LLMD_PARAMS,
+    indirect=True,
+)
+class TestLLMDOCICPUInference:
+    """LLMD inference testing with OCI storage and CPU runtime using vLLM."""
+
+    def test_llmd_oci(self, llmd_gateway, llmd_inference_service):
+        assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
+        assert verify_llm_service_status(llmd_inference_service), "LLMInferenceService should be ready"
+
+        verify_inference_response_llmd(
+            llm_service=llmd_inference_service,
+            inference_config=OPT125M_CPU_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=True,
+        )
@@ -0,0 +1,34 @@
+import pytest
+
+from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
+from utilities.constants import Protocols
+from utilities.llmd_utils import verify_inference_response_llmd
+
+from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
+
+pytestmark = [
+    pytest.mark.llmd_cpu,
+]
+
+
+@pytest.mark.parametrize(
+    "unprivileged_model_namespace, llmd_gateway, llmd_inference_service_s3",
+    [({"name": "llmd-s3-test"}, "openshift-default", {"storage_path": "opt-125m/"})],
+    indirect=True,
+)
+@pytest.mark.usefixtures("valid_aws_config")
+class TestLLMDS3Inference:
+    """LLMD inference testing with S3 storage."""
+
+    def test_llmd_s3(self, llmd_gateway, llmd_inference_service_s3):
+        assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
+        assert verify_llm_service_status(llmd_inference_service_s3), "LLMInferenceService should be ready"
+
+        verify_inference_response_llmd(
+            llm_service=llmd_inference_service_s3,
+            inference_config=OPT125M_CPU_INFERENCE_CONFIG,
+            inference_type="chat_completions",
+            protocol=Protocols.HTTP,
+            use_default_query=True,
+            insecure=True,
+        )
@@ -0,0 +1,61 @@
+"""
+Utility functions for LLM Deployment (LLMD) tests.
+
+This module provides helper functions for LLMD test operations using ocp_resources.
+Follows the established model server utils pattern for consistency.
+"""
+
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+from simple_logger.logger import get_logger
+
+
+LOGGER = get_logger(name=__name__)
+
+
+def verify_gateway_status(gateway: Gateway) -> bool:
+    """
+    Verify that a Gateway is properly configured and programmed.
+
+    Args:
+        gateway (Gateway): The Gateway resource to verify
+
+    Returns:
+        bool: True if gateway is properly configured, False otherwise
+    """
+    if not gateway.exists:
+        LOGGER.warning(f"Gateway {gateway.name} does not exist")
+        return False
+
+    conditions = gateway.instance.status.get("conditions", [])
+    for condition in conditions:
+        if condition["type"] == "Programmed" and condition["status"] == "True":
+            LOGGER.info(f"Gateway {gateway.name} is programmed and ready")
+            return True
+
+    LOGGER.warning(f"Gateway {gateway.name} is not in Programmed state")
+    return False
+
+
+def verify_llm_service_status(llm_service: LLMInferenceService) -> bool:
+    """
+    Verify that an LLMInferenceService is properly configured and ready.
+
+    Args:
+        llm_service (LLMInferenceService): The LLMInferenceService resource to verify
+
+    Returns:
+        bool: True if service is properly configured, False otherwise
+    """
+    if not llm_service.exists:
+        LOGGER.warning(f"LLMInferenceService {llm_service.name} does not exist")
+        return False
+
+    conditions = llm_service.instance.status.get("conditions", [])
+    for condition in conditions:
+        if condition["type"] == "Ready" and condition["status"] == "True":
+            LOGGER.info(f"LLMInferenceService {llm_service.name} is ready")
+            return True
+
+    LOGGER.warning(f"LLMInferenceService {llm_service.name} is not in Ready state")
+    return False
@@ -0,0 +1,30 @@
+"""Centralized constants for LLMD (LLM Deployment) utilities and tests."""
+
+from utilities.constants import Timeout
+
+DEFAULT_GATEWAY_NAME = "openshift-ai-inference"
+DEFAULT_GATEWAY_NAMESPACE = "openshift-ingress"
+OPENSHIFT_DEFAULT_GATEWAY_CLASS = "openshift-default"
+
+KSERVE_GATEWAY_LABEL = "serving.kserve.io/gateway"
+KSERVE_INGRESS_GATEWAY = "kserve-ingress-gateway"
+
+DEFAULT_LLM_ENDPOINT = "/v1/chat/completions"
+DEFAULT_MAX_TOKENS = 50
+DEFAULT_TEMPERATURE = 0.0
+DEFAULT_TIMEOUT = Timeout.TIMEOUT_30SEC
+
+VLLM_STORAGE_OCI = "oci://quay.io/mwaykole/test:opt-125m"
+VLLM_CPU_IMAGE = "quay.io/pierdipi/vllm-cpu:latest"
+DEFAULT_LLMD_REPLICAS = 1
+DEFAULT_S3_STORAGE_PATH = "opt-125m"
+
+DEFAULT_STORAGE_URI = VLLM_STORAGE_OCI
+DEFAULT_CONTAINER_IMAGE = VLLM_CPU_IMAGE
+
+DEFAULT_CPU_LIMIT = "1"
+DEFAULT_MEMORY_LIMIT = "10Gi"
+DEFAULT_CPU_REQUEST = "100m"
+DEFAULT_MEMORY_REQUEST = "8Gi"
+
+BASIC_LLMD_PARAMS = [({"name": "llmd-comprehensive-test"}, "openshift-default", "basic")]