Skip to content

Commit f032480

Browse files
committed
Add LLMD model server tests and utilities
1 parent 91fc08c commit f032480

File tree

8 files changed

+1042
-0
lines changed

8 files changed

+1042
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
LLM Deployment (LLMD) tests module.
3+
4+
This module contains tests for LLM deployment functionality including:
5+
- Gateway resource creation and management
6+
- LLMInferenceService deployment and configuration
7+
- Integration testing between gateway and inference services
8+
"""
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
from typing import Generator
2+
3+
import pytest
4+
from _pytest.fixtures import FixtureRequest
5+
from kubernetes.dynamic import DynamicClient
6+
from ocp_resources.gateway import Gateway
7+
from ocp_resources.llm_inference_service import LLMInferenceService
8+
from ocp_resources.namespace import Namespace
9+
from ocp_resources.secret import Secret
10+
from ocp_resources.service_account import ServiceAccount
11+
12+
from utilities.constants import Timeout
13+
from utilities.infra import s3_endpoint_secret
14+
from utilities.llmd_utils import create_gateway, create_llmisvc
15+
from utilities.llmd_constants import (
16+
DEFAULT_GATEWAY_NAMESPACE,
17+
VLLM_STORAGE_OCI,
18+
VLLM_CPU_IMAGE,
19+
DEFAULT_S3_STORAGE_PATH,
20+
)
21+
22+
23+
@pytest.fixture(scope="class")
24+
def gateway_namespace(admin_client: DynamicClient) -> str:
25+
return DEFAULT_GATEWAY_NAMESPACE
26+
27+
28+
@pytest.fixture(scope="class")
29+
def llmd_s3_secret(
30+
admin_client: DynamicClient,
31+
unprivileged_model_namespace: Namespace,
32+
aws_access_key_id: str,
33+
aws_secret_access_key: str,
34+
models_s3_bucket_name: str,
35+
models_s3_bucket_region: str,
36+
models_s3_bucket_endpoint: str,
37+
) -> Generator[Secret, None, None]:
38+
with s3_endpoint_secret(
39+
client=admin_client,
40+
name="llmd-s3-secret",
41+
namespace=unprivileged_model_namespace.name,
42+
aws_access_key=aws_access_key_id,
43+
aws_secret_access_key=aws_secret_access_key,
44+
aws_s3_region=models_s3_bucket_region,
45+
aws_s3_bucket=models_s3_bucket_name,
46+
aws_s3_endpoint=models_s3_bucket_endpoint,
47+
) as secret:
48+
yield secret
49+
50+
51+
@pytest.fixture(scope="class")
52+
def llmd_s3_service_account(
53+
admin_client: DynamicClient,
54+
llmd_s3_secret: Secret
55+
) -> Generator[ServiceAccount, None, None]:
56+
with ServiceAccount(
57+
client=admin_client,
58+
namespace=llmd_s3_secret.namespace,
59+
name="llmd-s3-service-account",
60+
secrets=[{"name": llmd_s3_secret.name}],
61+
) as sa:
62+
yield sa
63+
64+
65+
@pytest.fixture(scope="class")
66+
def llmd_gateway(
67+
request: FixtureRequest,
68+
admin_client: DynamicClient,
69+
gateway_namespace: str,
70+
) -> Generator[Gateway, None, None]:
71+
if isinstance(request.param, str):
72+
gateway_class_name = request.param
73+
kwargs = {}
74+
else:
75+
gateway_class_name = request.param.get("gateway_class_name", "openshift-default")
76+
kwargs = {k: v for k, v in request.param.items() if k != "gateway_class_name"}
77+
with create_gateway(
78+
client=admin_client,
79+
namespace=gateway_namespace,
80+
gateway_class_name=gateway_class_name,
81+
wait_for_condition=True,
82+
timeout=Timeout.TIMEOUT_5MIN,
83+
**kwargs
84+
) as gateway:
85+
yield gateway
86+
87+
88+
@pytest.fixture(scope="class")
89+
def llmd_inference_service(
90+
request: FixtureRequest,
91+
admin_client: DynamicClient,
92+
unprivileged_model_namespace: Namespace,
93+
) -> Generator[LLMInferenceService, None, None]:
94+
if isinstance(request.param, str):
95+
name_suffix = request.param
96+
kwargs = {}
97+
else:
98+
name_suffix = request.param.get("name_suffix", "basic")
99+
kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
100+
101+
service_name = kwargs.get("name", f"llm-{name_suffix}")
102+
103+
if "llmd_gateway" in request.fixturenames:
104+
request.getfixturevalue("llmd_gateway")
105+
106+
container_resources = kwargs.get(
107+
"container_resources",
108+
{
109+
"limits": {"cpu": "1", "memory": "10Gi"},
110+
"requests": {"cpu": "100m", "memory": "8Gi"},
111+
},
112+
)
113+
114+
with create_llmisvc(
115+
client=admin_client,
116+
name=service_name,
117+
namespace=unprivileged_model_namespace.name,
118+
storage_uri=kwargs.get("storage_uri", VLLM_STORAGE_OCI),
119+
container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
120+
container_resources=container_resources,
121+
wait=True,
122+
timeout=Timeout.TIMEOUT_15MIN,
123+
**{k: v for k, v in kwargs.items() if k != "name"}
124+
) as llm_service:
125+
yield llm_service
126+
127+
128+
@pytest.fixture(scope="class")
129+
def llmd_inference_service_s3(
130+
request: FixtureRequest,
131+
admin_client: DynamicClient,
132+
unprivileged_model_namespace: Namespace,
133+
llmd_s3_secret: Secret,
134+
llmd_s3_service_account: ServiceAccount,
135+
) -> Generator[LLMInferenceService, None, None]:
136+
if isinstance(request.param, str):
137+
name_suffix = request.param
138+
kwargs = {"storage_path": DEFAULT_S3_STORAGE_PATH}
139+
else:
140+
name_suffix = request.param.get("name_suffix", "s3")
141+
kwargs = {k: v for k, v in request.param.items() if k != "name_suffix"}
142+
143+
service_name = kwargs.get("name", f"llm-{name_suffix}")
144+
145+
if "storage_key" not in kwargs:
146+
kwargs["storage_key"] = llmd_s3_secret.name
147+
148+
if "storage_path" not in kwargs:
149+
kwargs["storage_path"] = DEFAULT_S3_STORAGE_PATH
150+
151+
container_resources = kwargs.get(
152+
"container_resources",
153+
{
154+
"limits": {"cpu": "1", "memory": "10Gi"},
155+
"requests": {"cpu": "100m", "memory": "8Gi"},
156+
},
157+
)
158+
159+
with create_llmisvc(
160+
client=admin_client,
161+
name=service_name,
162+
namespace=unprivileged_model_namespace.name,
163+
storage_key=kwargs.get("storage_key"),
164+
storage_path=kwargs.get("storage_path"),
165+
container_image=kwargs.get("container_image", VLLM_CPU_IMAGE),
166+
container_resources=container_resources,
167+
service_account=llmd_s3_service_account.name,
168+
wait=True,
169+
timeout=Timeout.TIMEOUT_15MIN,
170+
**{k: v for k, v in kwargs.items() if k not in ["name", "storage_key", "storage_path", "container_image", "container_resources"]}
171+
) as llm_service:
172+
yield llm_service
173+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
4+
from utilities.constants import Protocols
5+
from utilities.llmd_utils import verify_inference_response_llmd
6+
7+
from utilities.llmd_constants import BASIC_LLMD_PARAMS
8+
from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
9+
10+
pytestmark = [
11+
pytest.mark.llmd_cpu,
12+
]
13+
14+
15+
@pytest.mark.parametrize(
16+
"unprivileged_model_namespace, llmd_gateway, llmd_inference_service",
17+
BASIC_LLMD_PARAMS,
18+
indirect=True,
19+
)
20+
class TestLLMDOCICPUInference:
21+
"""LLMD inference testing with OCI storage and CPU runtime using vLLM."""
22+
23+
def test_llmd_oci(self, llmd_gateway, llmd_inference_service):
24+
assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
25+
assert verify_llm_service_status(llmd_inference_service), "LLMInferenceService should be ready"
26+
27+
verify_inference_response_llmd(
28+
llm_service=llmd_inference_service,
29+
inference_config=OPT125M_CPU_INFERENCE_CONFIG,
30+
inference_type="chat_completions",
31+
protocol=Protocols.HTTP,
32+
use_default_query=True,
33+
insecure=True,
34+
)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.llmd.utils import verify_llm_service_status, verify_gateway_status
4+
from utilities.constants import Protocols
5+
from utilities.llmd_utils import verify_inference_response_llmd
6+
7+
from utilities.manifests.opt125m_cpu import OPT125M_CPU_INFERENCE_CONFIG
8+
9+
pytestmark = [
10+
pytest.mark.llmd_cpu,
11+
]
12+
13+
14+
@pytest.mark.parametrize(
15+
"unprivileged_model_namespace, llmd_gateway, llmd_inference_service_s3",
16+
[
17+
({"name": "llmd-s3-test"}, "openshift-default", {"storage_path": "opt-125m/"})
18+
],
19+
indirect=True,
20+
)
21+
@pytest.mark.usefixtures("valid_aws_config")
22+
class TestLLMDS3Inference:
23+
"""LLMD inference testing with S3 storage."""
24+
25+
def test_llmd_s3(self, llmd_gateway, llmd_inference_service_s3):
26+
assert verify_gateway_status(llmd_gateway), "Gateway should be ready"
27+
assert verify_llm_service_status(llmd_inference_service_s3), "LLMInferenceService should be ready"
28+
29+
verify_inference_response_llmd(
30+
llm_service=llmd_inference_service_s3,
31+
inference_config=OPT125M_CPU_INFERENCE_CONFIG,
32+
inference_type="chat_completions",
33+
protocol=Protocols.HTTP,
34+
use_default_query=True,
35+
insecure=True,
36+
)
37+
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""
2+
Utility functions for LLM Deployment (LLMD) tests.
3+
4+
This module provides helper functions for LLMD test operations using ocp_resources.
5+
Follows the established model server utils pattern for consistency.
6+
"""
7+
8+
from ocp_resources.gateway import Gateway
9+
from ocp_resources.llm_inference_service import LLMInferenceService
10+
from simple_logger.logger import get_logger
11+
12+
13+
LOGGER = get_logger(name=__name__)
14+
15+
16+
def verify_gateway_status(gateway: Gateway) -> bool:
17+
"""
18+
Verify that a Gateway is properly configured and programmed.
19+
20+
Args:
21+
gateway (Gateway): The Gateway resource to verify
22+
23+
Returns:
24+
bool: True if gateway is properly configured, False otherwise
25+
"""
26+
if not gateway.exists:
27+
LOGGER.warning(f"Gateway {gateway.name} does not exist")
28+
return False
29+
30+
conditions = gateway.instance.status.get("conditions", [])
31+
for condition in conditions:
32+
if condition["type"] == "Programmed" and condition["status"] == "True":
33+
LOGGER.info(f"Gateway {gateway.name} is programmed and ready")
34+
return True
35+
36+
LOGGER.warning(f"Gateway {gateway.name} is not in Programmed state")
37+
return False
38+
39+
40+
def verify_llm_service_status(llm_service: LLMInferenceService) -> bool:
41+
"""
42+
Verify that an LLMInferenceService is properly configured and ready.
43+
44+
Args:
45+
llm_service (LLMInferenceService): The LLMInferenceService resource to verify
46+
47+
Returns:
48+
bool: True if service is properly configured, False otherwise
49+
"""
50+
if not llm_service.exists:
51+
LOGGER.warning(f"LLMInferenceService {llm_service.name} does not exist")
52+
return False
53+
54+
conditions = llm_service.instance.status.get("conditions", [])
55+
for condition in conditions:
56+
if condition["type"] == "Ready" and condition["status"] == "True":
57+
LOGGER.info(f"LLMInferenceService {llm_service.name} is ready")
58+
return True
59+
60+
LOGGER.warning(f"LLMInferenceService {llm_service.name} is not in Ready state")
61+
return False
62+

utilities/llmd_constants.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""Centralized constants for LLMD (LLM Deployment) utilities and tests."""
2+
3+
from utilities.constants import Timeout
4+
5+
DEFAULT_GATEWAY_NAME = "openshift-ai-inference"
6+
DEFAULT_GATEWAY_NAMESPACE = "openshift-ingress"
7+
OPENSHIFT_DEFAULT_GATEWAY_CLASS = "openshift-default"
8+
9+
KSERVE_GATEWAY_LABEL = "serving.kserve.io/gateway"
10+
KSERVE_INGRESS_GATEWAY = "kserve-ingress-gateway"
11+
12+
DEFAULT_LLM_ENDPOINT = "/v1/chat/completions"
13+
DEFAULT_MAX_TOKENS = 50
14+
DEFAULT_TEMPERATURE = 0.0
15+
DEFAULT_TIMEOUT = Timeout.TIMEOUT_30SEC
16+
17+
VLLM_STORAGE_OCI = "oci://quay.io/mwaykole/test:opt-125m"
18+
VLLM_CPU_IMAGE = "quay.io/pierdipi/vllm-cpu:latest"
19+
DEFAULT_LLMD_REPLICAS = 1
20+
DEFAULT_S3_STORAGE_PATH = "opt-125m"
21+
22+
DEFAULT_STORAGE_URI = VLLM_STORAGE_OCI
23+
DEFAULT_CONTAINER_IMAGE = VLLM_CPU_IMAGE
24+
25+
DEFAULT_CPU_LIMIT = "1"
26+
DEFAULT_MEMORY_LIMIT = "10Gi"
27+
DEFAULT_CPU_REQUEST = "100m"
28+
DEFAULT_MEMORY_REQUEST = "8Gi"
29+
30+
BASIC_LLMD_PARAMS = [
31+
({"name": "llmd-comprehensive-test"}, "openshift-default", "basic")
32+
]

0 commit comments

Comments
 (0)