Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ markers =
gpu: Mark tests which require GPU resources
multinode: Mark tests which require multiple nodes
keda: Mark tests which are testing KEDA scaling
llmd_cpu: Mark tests which are testing LLMD (LLM Deployment) with CPU resources

# Model Registry:
custom_namespace: mark tests that are to be run with custom namespace
Expand Down
Empty file.
174 changes: 174 additions & 0 deletions tests/model_serving/model_server/llmd/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
from typing import Generator

import pytest
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from ocp_resources.gateway import Gateway
from ocp_resources.llm_inference_service import LLMInferenceService
from ocp_resources.namespace import Namespace
from ocp_resources.secret import Secret
from ocp_resources.service_account import ServiceAccount

from utilities.constants import Timeout
from utilities.infra import s3_endpoint_secret
from utilities.llmd_utils import create_gateway, create_llmisvc
from utilities.llmd_constants import (
DEFAULT_GATEWAY_NAMESPACE,
VLLM_STORAGE_OCI,
VLLM_CPU_IMAGE,
DEFAULT_S3_STORAGE_PATH,
)


@pytest.fixture(scope="class")
def gateway_namespace(admin_client: DynamicClient) -> str:
return DEFAULT_GATEWAY_NAMESPACE


@pytest.fixture(scope="class")
def llmd_s3_secret(
admin_client: DynamicClient,
unprivileged_model_namespace: Namespace,
aws_access_key_id: str,
aws_secret_access_key: str,
models_s3_bucket_name: str,
models_s3_bucket_region: str,
models_s3_bucket_endpoint: str,
) -> Generator[Secret, None, None]:
with s3_endpoint_secret(
client=admin_client,
name="llmd-s3-secret",
namespace=unprivileged_model_namespace.name,
aws_access_key=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_s3_region=models_s3_bucket_region,
aws_s3_bucket=models_s3_bucket_name,
aws_s3_endpoint=models_s3_bucket_endpoint,
) as secret:
yield secret


@pytest.fixture(scope="class")
def llmd_s3_service_account(
admin_client: DynamicClient, llmd_s3_secret: Secret
) -> Generator[ServiceAccount, None, None]:
with ServiceAccount(
client=admin_client,
namespace=llmd_s3_secret.namespace,
name="llmd-s3-service-account",
secrets=[{"name": llmd_s3_secret.name}],
) as sa:
yield sa


@pytest.fixture(scope="class")
def llmd_gateway(
request: FixtureRequest,
admin_client: DynamicClient,
gateway_namespace: str,
) -> Generator[Gateway, None, None]:
if isinstance(request.param, str):
gateway_class_name = request.param
kwargs = {}
else:
gateway_class_name = request.param.get("gateway_class_name", "openshift-default")
kwargs = {k: v for k, v in request.param.items() if k != "gateway_class_name"}
with create_gateway(
client=admin_client,
namespace=gateway_namespace,
gateway_class_name=gateway_class_name,
wait_for_condition=True,
timeout=Timeout.TIMEOUT_5MIN,
**kwargs,
) as gateway:
yield gateway


@pytest.fixture(scope="class")
def llmd_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
unprivileged_model_namespace: Namespace,
) -> Generator[LLMInferenceService, None, None]:
if isinstance(request.param, str):
name_suffix = request.param
kwargs = {}
else:
name_suffix = request.param.get("name_suffix", "basic")
kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}

service_name = kwargs.get("name", f"llm-{name_suffix}")

if "llmd_gateway" in request.fixturenames:
request.getfixturevalue(argname="llmd_gateway")
container_resources = kwargs.get(
"container_resources",
{
"limits": {"cpu": "1", "memory": "10Gi"},
"requests": {"cpu": "100m", "memory": "8Gi"},
},
)

with create_llmisvc(
client=admin_client,
name=service_name,
namespace=unprivileged_model_namespace.name,
storage_uri=kwargs.get("storage_uri", VLLM_STORAGE_OCI),
container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
container_resources=container_resources,
wait=True,
timeout=Timeout.TIMEOUT_15MIN,
**{k: v for k, v in kwargs.items() if k != "name"},
) as llm_service:
yield llm_service


@pytest.fixture(scope="class")
def llmd_inference_service_s3(
request: FixtureRequest,
admin_client: DynamicClient,
unprivileged_model_namespace: Namespace,
llmd_s3_secret: Secret,
llmd_s3_service_account: ServiceAccount,
) -> Generator[LLMInferenceService, None, None]:
if isinstance(request.param, str):
name_suffix = request.param
kwargs = {"storage_path": DEFAULT_S3_STORAGE_PATH}
else:
name_suffix = request.param.get("name_suffix", "s3")
kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}

service_name = kwargs.get("name", f"llm-{name_suffix}")

if "storage_key" not in kwargs:
kwargs["storage_key"] = llmd_s3_secret.name

if "storage_path" not in kwargs:
kwargs["storage_path"] = DEFAULT_S3_STORAGE_PATH

container_resources = kwargs.get(
"container_resources",
{
"limits": {"cpu": "1", "memory": "10Gi"},
"requests": {"cpu": "100m", "memory": "8Gi"},
},
)

with create_llmisvc(
client=admin_client,
name=service_name,
namespace=unprivileged_model_namespace.name,
storage_key=kwargs.get("storage_key"),
storage_path=kwargs.get("storage_path"),
container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
container_resources=container_resources,
service_account=llmd_s3_service_account.name,
wait=True,
timeout=Timeout.TIMEOUT_15MIN,
**{
k: v
for k, v in kwargs.items()
if k not in ["name", "storage_key", "storage_path", "container_image", "container_resources"]
},
) as llm_service:
yield llm_service
34 changes: 34 additions & 0 deletions tests/model_serving/model_server/llmd/test_llmd_oci_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
from utilities.constants import Protocols
from utilities.llmd_utils import verify_inference_response_llmd

from utilities.llmd_constants import BASIC_LLMD_PARAMS
from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG

pytestmark = [
pytest.mark.llmd_cpu,
]


@pytest.mark.parametrize(
"unprivileged_model_namespace, llmd_gateway, llmd_inference_service",
BASIC_LLMD_PARAMS,
indirect=True,
)
class TestLLMDOCICPUInference:
"""LLMD inference testing with OCI storage and CPU runtime using vLLM."""

def test_llmd_oci(self, llmd_gateway, llmd_inference_service):
assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
assert verify_llm_service_status(llmd_inference_service), "LLMInferenceService should be ready"

verify_inference_response_llmd(
llm_service=llmd_inference_service,
inference_config=OPT125M_CPU_INFERENCE_CONFIG,
inference_type="chat_completions",
protocol=Protocols.HTTP,
use_default_query=True,
insecure=True,
)
34 changes: 34 additions & 0 deletions tests/model_serving/model_server/llmd/test_llmd_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
from utilities.constants import Protocols
from utilities.llmd_utils import verify_inference_response_llmd

from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG

pytestmark = [
pytest.mark.llmd_cpu,
]


@pytest.mark.parametrize(
"unprivileged_model_namespace, llmd_gateway, llmd_inference_service_s3",
[({"name": "llmd-s3-test"}, "openshift-default", {"storage_path": "opt-125m/"})],
indirect=True,
)
@pytest.mark.usefixtures("valid_aws_config")
class TestLLMDS3Inference:
"""LLMD inference testing with S3 storage."""

def test_llmd_s3(self, llmd_gateway, llmd_inference_service_s3):
assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
assert verify_llm_service_status(llmd_inference_service_s3), "LLMInferenceService should be ready"

verify_inference_response_llmd(
llm_service=llmd_inference_service_s3,
inference_config=OPT125M_CPU_INFERENCE_CONFIG,
inference_type="chat_completions",
protocol=Protocols.HTTP,
use_default_query=True,
insecure=True,
)
61 changes: 61 additions & 0 deletions tests/model_serving/model_server/llmd/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Utility functions for LLM Deployment (LLMD) tests.

This module provides helper functions for LLMD test operations using ocp_resources.
Follows the established model server utils pattern for consistency.
"""

from ocp_resources.gateway import Gateway
from ocp_resources.llm_inference_service import LLMInferenceService
from simple_logger.logger import get_logger


LOGGER = get_logger(name=__name__)


def verify_gateway_status(gateway: Gateway) -> bool:
"""
Verify that a Gateway is properly configured and programmed.

Args:
gateway (Gateway): The Gateway resource to verify

Returns:
bool: True if gateway is properly configured, False otherwise
"""
if not gateway.exists:
LOGGER.warning(f"Gateway {gateway.name} does not exist")
return False

conditions = gateway.instance.status.get("conditions", [])
for condition in conditions:
if condition["type"] == "Programmed" and condition["status"] == "True":
LOGGER.info(f"Gateway {gateway.name} is programmed and ready")
return True

LOGGER.warning(f"Gateway {gateway.name} is not in Programmed state")
return False


def verify_llm_service_status(llm_service: LLMInferenceService) -> bool:
"""
Verify that an LLMInferenceService is properly configured and ready.

Args:
llm_service (LLMInferenceService): The LLMInferenceService resource to verify

Returns:
bool: True if service is properly configured, False otherwise
"""
if not llm_service.exists:
LOGGER.warning(f"LLMInferenceService {llm_service.name} does not exist")
return False

conditions = llm_service.instance.status.get("conditions", [])
for condition in conditions:
if condition["type"] == "Ready" and condition["status"] == "True":
LOGGER.info(f"LLMInferenceService {llm_service.name} is ready")
return True

LOGGER.warning(f"LLMInferenceService {llm_service.name} is not in Ready state")
return False
30 changes: 30 additions & 0 deletions utilities/llmd_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Centralized constants for LLMD (LLM Deployment) utilities and tests."""

from utilities.constants import Timeout

DEFAULT_GATEWAY_NAME = "openshift-ai-inference"
DEFAULT_GATEWAY_NAMESPACE = "openshift-ingress"
OPENSHIFT_DEFAULT_GATEWAY_CLASS = "openshift-default"

KSERVE_GATEWAY_LABEL = "serving.kserve.io/gateway"
KSERVE_INGRESS_GATEWAY = "kserve-ingress-gateway"

DEFAULT_LLM_ENDPOINT = "/v1/chat/completions"
DEFAULT_MAX_TOKENS = 50
DEFAULT_TEMPERATURE = 0.0
DEFAULT_TIMEOUT = Timeout.TIMEOUT_30SEC

VLLM_STORAGE_OCI = "oci://quay.io/mwaykole/test:opt-125m"
VLLM_CPU_IMAGE = "quay.io/pierdipi/vllm-cpu:latest"
DEFAULT_LLMD_REPLICAS = 1
DEFAULT_S3_STORAGE_PATH = "opt-125m"

DEFAULT_STORAGE_URI = VLLM_STORAGE_OCI
DEFAULT_CONTAINER_IMAGE = VLLM_CPU_IMAGE

DEFAULT_CPU_LIMIT = "1"
DEFAULT_MEMORY_LIMIT = "10Gi"
DEFAULT_CPU_REQUEST = "100m"
DEFAULT_MEMORY_REQUEST = "8Gi"

BASIC_LLMD_PARAMS = [({"name": "llmd-comprehensive-test"}, "openshift-default", "basic")]
Loading