Skip to content

Commit 61eaaa7

Browse files
committed
refactoring llmd tests using config-based approach
1 parent 6d3f00a commit 61eaaa7

24 files changed

+1916
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""llm-d test module for OpenDataHub and OpenShift AI."""
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
import logging
2+
from collections.abc import Generator
3+
from contextlib import ExitStack, contextmanager
4+
from typing import Any
5+
6+
import pytest
7+
import yaml
8+
9+
logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
10+
from _pytest.fixtures import FixtureRequest
11+
from kubernetes.dynamic import DynamicClient
12+
from ocp_resources.config_map import ConfigMap
13+
from ocp_resources.gateway import Gateway
14+
from ocp_resources.llm_inference_service import LLMInferenceService
15+
from ocp_resources.namespace import Namespace
16+
from ocp_resources.role import Role
17+
from ocp_resources.role_binding import RoleBinding
18+
from ocp_resources.service_account import ServiceAccount
19+
from simple_logger.logger import get_logger
20+
21+
from tests.model_serving.model_server.llmd_v2.llmd_configs import TinyLlamaOciConfig
22+
from utilities.constants import Timeout
23+
from utilities.infra import create_inference_token, s3_endpoint_secret, update_configmap_data
24+
from utilities.llmd_constants import LLMDGateway
25+
from utilities.llmd_utils import create_llmd_gateway
26+
from utilities.logger import RedactedString
27+
28+
LOGGER = get_logger(name=__name__)
29+
30+
31+
# ===========================================
32+
# Gateway
33+
# ===========================================
34+
@pytest.fixture(scope="session", autouse=True)
35+
def shared_llmd_gateway(admin_client: DynamicClient) -> Generator[Gateway]:
36+
"""Shared LLMD gateway for all tests."""
37+
with create_llmd_gateway(
38+
client=admin_client,
39+
namespace=LLMDGateway.DEFAULT_NAMESPACE,
40+
gateway_class_name=LLMDGateway.DEFAULT_CLASS,
41+
wait_for_condition=True,
42+
timeout=Timeout.TIMEOUT_1MIN,
43+
teardown=True,
44+
) as gateway:
45+
yield gateway
46+
47+
48+
# ===========================================
49+
# Storage — S3 secret + service account
50+
# ===========================================
51+
@pytest.fixture(scope="class")
52+
def s3_service_account(
53+
request: FixtureRequest,
54+
admin_client: DynamicClient,
55+
unprivileged_model_namespace: Namespace,
56+
) -> Generator[str]:
57+
"""Create S3 secret + service account. Resolved automatically for S3 configs."""
58+
with ExitStack() as stack:
59+
secret = stack.enter_context(
60+
cm=s3_endpoint_secret(
61+
client=admin_client,
62+
name="llmd-s3-secret",
63+
namespace=unprivileged_model_namespace.name,
64+
aws_access_key=request.getfixturevalue(argname="aws_access_key_id"),
65+
aws_secret_access_key=request.getfixturevalue(argname="aws_secret_access_key"),
66+
aws_s3_region=request.getfixturevalue(argname="models_s3_bucket_region"),
67+
aws_s3_bucket=request.getfixturevalue(argname="models_s3_bucket_name"),
68+
aws_s3_endpoint=request.getfixturevalue(argname="models_s3_bucket_endpoint"),
69+
)
70+
)
71+
sa = stack.enter_context(
72+
cm=ServiceAccount(
73+
client=admin_client,
74+
namespace=unprivileged_model_namespace.name,
75+
name="llmd-s3-service-account",
76+
secrets=[{"name": secret.name}],
77+
)
78+
)
79+
yield sa.name
80+
81+
82+
# ===========================================
83+
# LLMInferenceService creation
84+
# ===========================================
85+
@pytest.fixture(scope="class")
86+
def llmisvc(
87+
request: FixtureRequest,
88+
admin_client: DynamicClient,
89+
unprivileged_model_namespace: Namespace,
90+
) -> Generator[LLMInferenceService]:
91+
"""LLMInferenceService fixture driven by a config class.
92+
93+
Usage:
94+
NAMESPACE = ns_from_file(__file__)
95+
96+
@pytest.mark.parametrize(
97+
"unprivileged_model_namespace, llmisvc",
98+
[({"name": NAMESPACE}, SomeConfig)],
99+
indirect=True,
100+
)
101+
"""
102+
config_cls = request.param
103+
namespace = unprivileged_model_namespace.name
104+
105+
service_account = None
106+
if config_cls.storage_uri.startswith("s3://"):
107+
service_account = request.getfixturevalue(argname="s3_service_account")
108+
109+
with _create_llmisvc_from_config(
110+
config_cls=config_cls, namespace=namespace, client=admin_client, service_account=service_account
111+
) as svc:
112+
yield svc
113+
114+
115+
@pytest.fixture(scope="class")
116+
def llmisvc_with_auth(
117+
admin_client: DynamicClient,
118+
unprivileged_model_namespace: Namespace,
119+
) -> Generator[list[dict]]:
120+
"""Create 2 auth-enabled LLMISVCs with separate tokens for cross-auth testing."""
121+
namespace = unprivileged_model_namespace.name
122+
with ExitStack() as stack:
123+
pairs = []
124+
for i in range(2):
125+
sa = stack.enter_context(
126+
cm=ServiceAccount(
127+
client=admin_client,
128+
namespace=namespace,
129+
name=f"auth-sa-{i}",
130+
)
131+
)
132+
cfg = TinyLlamaOciConfig.with_overrides(
133+
name=f"llmisvc-auth-{i}",
134+
)
135+
svc = stack.enter_context(
136+
cm=_create_llmisvc_from_config(
137+
config_cls=cfg,
138+
namespace=namespace,
139+
client=admin_client,
140+
service_account=sa.name,
141+
extra_annotations={"security.opendatahub.io/enable-auth": "true"},
142+
)
143+
)
144+
role = stack.enter_context(
145+
cm=Role(
146+
client=admin_client,
147+
name=f"llmisvc-auth-{i}-view",
148+
namespace=unprivileged_model_namespace.name,
149+
rules=[
150+
{
151+
"apiGroups": [svc.api_group],
152+
"resources": ["llminferenceservices"],
153+
"verbs": ["get"],
154+
"resourceNames": [svc.name],
155+
}
156+
],
157+
)
158+
)
159+
stack.enter_context(
160+
cm=RoleBinding(
161+
client=admin_client,
162+
namespace=unprivileged_model_namespace.name,
163+
name=f"auth-sa-{i}-view",
164+
role_ref_name=role.name,
165+
role_ref_kind=role.kind,
166+
subjects_kind="ServiceAccount",
167+
subjects_name=sa.name,
168+
)
169+
)
170+
token = RedactedString(value=create_inference_token(model_service_account=sa))
171+
pairs.append({"service": svc, "token": token})
172+
yield pairs
173+
174+
175+
# ===========================================
176+
# Auth — SA + RBAC + token
177+
# ===========================================
178+
@pytest.fixture(scope="class")
179+
def llmisvc_token(
180+
admin_client: DynamicClient,
181+
llmisvc: LLMInferenceService,
182+
) -> Generator[str]:
183+
"""Create a dedicated SA with RBAC and return an auth token for the llmisvc."""
184+
sa_name = f"{llmisvc.name}-auth-sa"
185+
with (
186+
ServiceAccount(client=admin_client, name=sa_name, namespace=llmisvc.namespace) as sa,
187+
Role(
188+
client=admin_client,
189+
name=f"{llmisvc.name}-view",
190+
namespace=llmisvc.namespace,
191+
rules=[
192+
{
193+
"apiGroups": [llmisvc.api_group],
194+
"resources": ["llminferenceservices"],
195+
"verbs": ["get"],
196+
"resourceNames": [llmisvc.name],
197+
},
198+
],
199+
) as role,
200+
RoleBinding(
201+
client=admin_client,
202+
namespace=llmisvc.namespace,
203+
name=f"{sa_name}-view",
204+
role_ref_name=role.name,
205+
role_ref_kind=role.kind,
206+
subjects_kind="ServiceAccount",
207+
subjects_name=sa_name,
208+
),
209+
):
210+
yield RedactedString(value=create_inference_token(model_service_account=sa))
211+
212+
213+
# ===========================================
214+
# Monitoring
215+
# ===========================================
216+
@pytest.fixture(scope="session", autouse=True)
217+
def llmd_user_workload_monitoring_config_map(
218+
admin_client: DynamicClient, cluster_monitoring_config: ConfigMap
219+
) -> Generator[ConfigMap]:
220+
"""Ephemeral user workload monitoring for LLMD tests."""
221+
data = {
222+
"config.yaml": yaml.dump({
223+
"prometheus": {
224+
"logLevel": "debug",
225+
"retention": "15d",
226+
}
227+
})
228+
}
229+
230+
with update_configmap_data(
231+
client=admin_client,
232+
name="user-workload-monitoring-config",
233+
namespace="openshift-user-workload-monitoring",
234+
data=data,
235+
) as cm:
236+
yield cm
237+
238+
239+
# ===========================================
240+
# Helpers (not fixtures)
241+
# ===========================================
242+
@contextmanager
243+
def _create_llmisvc_from_config(
244+
config_cls: type,
245+
namespace: str,
246+
client: DynamicClient,
247+
service_account: str | None = None,
248+
extra_annotations: dict[str, str] | None = None,
249+
) -> Generator[LLMInferenceService, Any]:
250+
"""Create an LLMInferenceService from a config class."""
251+
LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
252+
253+
model: dict[str, Any] = {"uri": config_cls.storage_uri}
254+
if config_cls.model_name:
255+
model["name"] = config_cls.model_name
256+
257+
main_container: dict[str, Any] = {"name": "main"}
258+
main_container.update({
259+
k: v
260+
for k, v in {
261+
"image": config_cls.container_image,
262+
"resources": config_cls.container_resources(),
263+
"env": config_cls.container_env(),
264+
"livenessProbe": config_cls.liveness_probe(),
265+
"readinessProbe": config_cls.readiness_probe(),
266+
}.items()
267+
if v
268+
})
269+
270+
template: dict[str, Any] = {
271+
"configRef": config_cls.template_config_ref,
272+
"containers": [main_container],
273+
}
274+
if service_account:
275+
template["serviceAccountName"] = service_account
276+
277+
annotations = config_cls.annotations()
278+
if extra_annotations:
279+
annotations.update(extra_annotations)
280+
281+
prefill = config_cls.prefill_config()
282+
283+
svc_kwargs: dict[str, Any] = {
284+
"client": client,
285+
"name": config_cls.name,
286+
"namespace": namespace,
287+
"annotations": annotations,
288+
"label": config_cls.labels(),
289+
"teardown": True,
290+
"model": model,
291+
"replicas": config_cls.replicas,
292+
"router": config_cls.router_config(),
293+
"template": template,
294+
}
295+
if prefill is not None:
296+
if service_account and "template" in prefill:
297+
prefill["template"]["serviceAccountName"] = service_account
298+
svc_kwargs["prefill"] = prefill
299+
300+
with LLMInferenceService(**svc_kwargs) as llm_service:
301+
yield llm_service
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# llmd_configs
2+
3+
One config class per LLMInferenceService test scenario. Each class is the single source of truth for its deployment.
4+
5+
## Hierarchy
6+
7+
```
8+
LLMISvcConfig (config_base.py) # Base — defaults, helpers
9+
├── CpuConfig (config_base.py) # CPU image, env, resources
10+
│ ├── TinyLlamaOciConfig # OCI storage
11+
│ ├── TinyLlamaS3Config # S3 storage
12+
│ └── Opt125mHfConfig # HuggingFace storage
13+
└── GpuConfig (config_base.py) # GPU resources
14+
├── QwenS3Config # S3 storage
15+
│ ├── PrefillDecodeConfig # prefill-decode disaggregation
16+
│ └── EstimatedPrefixCacheConfig # estimated prefix cache
17+
└── QwenHfConfig # HuggingFace storage
18+
└── PrecisePrefixCacheConfig # precise prefix cache
19+
```
20+
21+
Model+storage classes are in `config_models.py`. Feature configs are in their own files.
22+
23+
## Usage
24+
25+
```python
26+
@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig], indirect=True)
27+
def test_something(self, llmisvc):
28+
...
29+
```
30+
31+
Override inline with `with_overrides()`:
32+
33+
```python
34+
@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig.with_overrides(replicas=2)], indirect=True)
35+
```
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .config_base import LLMISvcConfig
2+
from .config_estimated_prefix_cache import EstimatedPrefixCacheConfig
3+
from .config_models import Opt125mHfConfig, QwenHfConfig, QwenS3Config, TinyLlamaOciConfig, TinyLlamaS3Config
4+
from .config_precise_prefix_cache import PrecisePrefixCacheConfig
5+
from .config_prefill_decode import PrefillDecodeConfig
6+
7+
__all__ = [
8+
"EstimatedPrefixCacheConfig",
9+
"LLMISvcConfig",
10+
"Opt125mHfConfig",
11+
"PrecisePrefixCacheConfig",
12+
"PrefillDecodeConfig",
13+
"QwenHfConfig",
14+
"QwenS3Config",
15+
"TinyLlamaOciConfig",
16+
"TinyLlamaS3Config",
17+
]

0 commit comments

Comments
 (0)