Skip to content

Commit c7c43d2

Browse files
committed
refactoring llmd tests using config-based approach
1 parent 6d3f00a commit c7c43d2

21 files changed

+1705
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""llm-d test module for OpenDataHub and OpenShift AI."""
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
import logging
2+
from collections import namedtuple
3+
from collections.abc import Generator
4+
from contextlib import ExitStack, contextmanager
5+
from typing import Any
6+
7+
import pytest
8+
import yaml
9+
10+
logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
11+
from _pytest.fixtures import FixtureRequest
12+
from kubernetes.dynamic import DynamicClient
13+
from ocp_resources.config_map import ConfigMap
14+
from ocp_resources.gateway import Gateway
15+
from ocp_resources.llm_inference_service import LLMInferenceService
16+
from ocp_resources.namespace import Namespace
17+
from ocp_resources.role import Role
18+
from ocp_resources.role_binding import RoleBinding
19+
from ocp_resources.service_account import ServiceAccount
20+
from simple_logger.logger import get_logger
21+
22+
from tests.model_serving.model_server.llmd_v2.llmd_configs import TinyLlamaOciConfig
23+
from tests.model_serving.model_server.llmd_v2.utils import wait_for_llmisvc
24+
from utilities.constants import Timeout
25+
from utilities.infra import create_inference_token, s3_endpoint_secret, update_configmap_data
26+
from utilities.llmd_constants import LLMDGateway
27+
from utilities.llmd_utils import create_llmd_gateway
28+
from utilities.logger import RedactedString
29+
30+
LOGGER = get_logger(name=__name__)
31+
32+
AuthEntry = namedtuple("AuthEntry", ["service", "token"])
33+
34+
35+
# ===========================================
36+
# Gateway
37+
# ===========================================
38+
@pytest.fixture(scope="session", autouse=True)
39+
def shared_llmd_gateway(admin_client: DynamicClient) -> Generator[Gateway]:
40+
"""Shared LLMD gateway for all tests."""
41+
with create_llmd_gateway(
42+
client=admin_client,
43+
namespace=LLMDGateway.DEFAULT_NAMESPACE,
44+
gateway_class_name=LLMDGateway.DEFAULT_CLASS,
45+
wait_for_condition=True,
46+
timeout=Timeout.TIMEOUT_1MIN,
47+
teardown=True,
48+
) as gateway:
49+
yield gateway
50+
51+
52+
# ===========================================
53+
# Storage — S3 secret + service account
54+
# ===========================================
55+
@pytest.fixture(scope="class")
56+
def s3_service_account(
57+
request: FixtureRequest,
58+
admin_client: DynamicClient,
59+
unprivileged_model_namespace: Namespace,
60+
) -> Generator[str]:
61+
"""Create S3 secret + service account. Resolved automatically for S3 configs."""
62+
with ExitStack() as stack:
63+
secret = stack.enter_context(
64+
cm=s3_endpoint_secret(
65+
client=admin_client,
66+
name="llmd-s3-secret",
67+
namespace=unprivileged_model_namespace.name,
68+
aws_access_key=request.getfixturevalue(argname="aws_access_key_id"),
69+
aws_secret_access_key=request.getfixturevalue(argname="aws_secret_access_key"),
70+
aws_s3_region=request.getfixturevalue(argname="models_s3_bucket_region"),
71+
aws_s3_bucket=request.getfixturevalue(argname="models_s3_bucket_name"),
72+
aws_s3_endpoint=request.getfixturevalue(argname="models_s3_bucket_endpoint"),
73+
)
74+
)
75+
sa = stack.enter_context(
76+
cm=ServiceAccount(
77+
client=admin_client,
78+
namespace=unprivileged_model_namespace.name,
79+
name="llmd-s3-service-account",
80+
secrets=[{"name": secret.name}],
81+
)
82+
)
83+
yield sa.name
84+
85+
86+
# ===========================================
87+
# LLMInferenceService creation
88+
# ===========================================
89+
@pytest.fixture(scope="class")
90+
def llmisvc(
91+
request: FixtureRequest,
92+
admin_client: DynamicClient,
93+
unprivileged_model_namespace: Namespace,
94+
) -> Generator[LLMInferenceService]:
95+
"""LLMInferenceService fixture driven by a config class.
96+
97+
Usage:
98+
NAMESPACE = ns_from_file(__file__)
99+
100+
@pytest.mark.parametrize(
101+
"unprivileged_model_namespace, llmisvc",
102+
[({"name": NAMESPACE}, SomeConfig)],
103+
indirect=True,
104+
)
105+
"""
106+
config_cls = request.param
107+
namespace = unprivileged_model_namespace.name
108+
109+
service_account = None
110+
if config_cls.storage_uri.startswith("s3://"):
111+
service_account = request.getfixturevalue(argname="s3_service_account")
112+
113+
with _create_llmisvc_from_config(
114+
config_cls=config_cls, namespace=namespace, client=admin_client, service_account=service_account
115+
) as svc:
116+
yield svc
117+
118+
119+
@pytest.fixture(scope="class")
120+
def llmisvc_auth_pair(
121+
admin_client: DynamicClient,
122+
unprivileged_model_namespace: Namespace,
123+
) -> Generator[tuple[AuthEntry, AuthEntry]]:
124+
"""Two auth-enabled LLMISVCs with independent tokens for cross-auth testing."""
125+
namespace = unprivileged_model_namespace.name
126+
with ExitStack() as stack:
127+
entries = []
128+
for i in range(2):
129+
cfg = TinyLlamaOciConfig.with_overrides(
130+
name=f"llmisvc-auth-{i}",
131+
enable_auth=True,
132+
)
133+
svc = stack.enter_context(
134+
cm=_create_llmisvc_from_config(
135+
config_cls=cfg,
136+
namespace=namespace,
137+
client=admin_client,
138+
)
139+
)
140+
token = stack.enter_context(
141+
cm=_create_auth_resources(
142+
client=admin_client,
143+
namespace=namespace,
144+
svc=svc,
145+
sa_name=f"auth-sa-{i}",
146+
)
147+
)
148+
entries.append(AuthEntry(service=svc, token=token))
149+
yield tuple(entries)
150+
151+
152+
# ===========================================
153+
# Auth — SA + RBAC + token
154+
# ===========================================
155+
@pytest.fixture(scope="class")
156+
def llmisvc_token(
157+
admin_client: DynamicClient,
158+
llmisvc: LLMInferenceService,
159+
) -> Generator[str]:
160+
"""Create a dedicated SA with RBAC and return an auth token for the llmisvc."""
161+
with _create_auth_resources(
162+
client=admin_client,
163+
namespace=llmisvc.namespace,
164+
svc=llmisvc,
165+
sa_name=f"{llmisvc.name}-auth-sa",
166+
) as token:
167+
yield token
168+
169+
170+
# ===========================================
171+
# Monitoring
172+
# ===========================================
173+
@pytest.fixture(scope="session", autouse=True)
174+
def llmd_user_workload_monitoring_config_map(
175+
admin_client: DynamicClient, cluster_monitoring_config: ConfigMap
176+
) -> Generator[ConfigMap]:
177+
"""Ephemeral user workload monitoring for LLMD tests."""
178+
data = {
179+
"config.yaml": yaml.dump({
180+
"prometheus": {
181+
"logLevel": "debug",
182+
"retention": "15d",
183+
}
184+
})
185+
}
186+
187+
with update_configmap_data(
188+
client=admin_client,
189+
name="user-workload-monitoring-config",
190+
namespace="openshift-user-workload-monitoring",
191+
data=data,
192+
) as cm:
193+
yield cm
194+
195+
196+
# ===========================================
197+
# Helpers (not fixtures)
198+
# ===========================================
199+
@contextmanager
200+
def _create_auth_resources(
201+
client: DynamicClient,
202+
namespace: str,
203+
svc: LLMInferenceService,
204+
sa_name: str,
205+
) -> Generator[RedactedString, Any]:
206+
"""Create SA + Role + RoleBinding and yield an auth token."""
207+
with (
208+
ServiceAccount(client=client, namespace=namespace, name=sa_name) as sa,
209+
Role(
210+
client=client,
211+
name=f"{svc.name}-view",
212+
namespace=namespace,
213+
rules=[
214+
{
215+
"apiGroups": [svc.api_group],
216+
"resources": ["llminferenceservices"],
217+
"verbs": ["get"],
218+
"resourceNames": [svc.name],
219+
}
220+
],
221+
) as role,
222+
RoleBinding(
223+
client=client,
224+
namespace=namespace,
225+
name=f"{sa_name}-view",
226+
role_ref_name=role.name,
227+
role_ref_kind=role.kind,
228+
subjects_kind="ServiceAccount",
229+
subjects_name=sa_name,
230+
),
231+
):
232+
yield RedactedString(value=create_inference_token(model_service_account=sa))
233+
234+
235+
@contextmanager
236+
def _create_llmisvc_from_config(
237+
config_cls: type,
238+
namespace: str,
239+
client: DynamicClient,
240+
service_account: str | None = None,
241+
) -> Generator[LLMInferenceService, Any]:
242+
"""Create an LLMInferenceService from a config class."""
243+
LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
244+
245+
model: dict[str, Any] = {"uri": config_cls.storage_uri}
246+
if config_cls.model_name:
247+
model["name"] = config_cls.model_name
248+
249+
main_container: dict[str, Any] = {"name": "main"}
250+
main_container.update({
251+
k: v
252+
for k, v in {
253+
"image": config_cls.container_image,
254+
"resources": config_cls.container_resources(),
255+
"env": config_cls.container_env(),
256+
"livenessProbe": config_cls.liveness_probe(),
257+
"readinessProbe": config_cls.readiness_probe(),
258+
}.items()
259+
if v
260+
})
261+
262+
template: dict[str, Any] = {
263+
"configRef": config_cls.template_config_ref,
264+
"containers": [main_container],
265+
}
266+
if service_account:
267+
template["serviceAccountName"] = service_account
268+
269+
prefill = config_cls.prefill_config()
270+
271+
svc_kwargs: dict[str, Any] = {
272+
"client": client,
273+
"name": config_cls.name,
274+
"namespace": namespace,
275+
"annotations": config_cls.annotations(),
276+
"label": config_cls.labels(),
277+
"teardown": True,
278+
"model": model,
279+
"replicas": config_cls.replicas,
280+
"router": config_cls.router_config(),
281+
"template": template,
282+
}
283+
if prefill is not None:
284+
if service_account and "template" in prefill:
285+
prefill["template"]["serviceAccountName"] = service_account
286+
svc_kwargs["prefill"] = prefill
287+
288+
with LLMInferenceService(**svc_kwargs) as llm_service:
289+
wait_for_llmisvc(llmisvc=llm_service)
290+
yield llm_service
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# llmd_configs
2+
3+
One config class per LLMInferenceService test scenario. Each class is the single source of truth for its deployment.
4+
5+
## Hierarchy
6+
7+
```
8+
LLMISvcConfig (config_base.py) # Base — defaults, helpers
9+
├── CpuConfig (config_base.py) # CPU image, env, resources
10+
│ ├── TinyLlamaOciConfig # OCI storage
11+
│ ├── TinyLlamaS3Config # S3 storage
12+
│ └── Opt125mHfConfig # HuggingFace storage
13+
└── GpuConfig (config_base.py) # GPU resources
14+
├── QwenS3Config # S3 storage
15+
│ ├── PrefillDecodeConfig # prefill-decode disaggregation
16+
│ └── EstimatedPrefixCacheConfig # estimated prefix cache
17+
└── QwenHfConfig # HuggingFace storage
18+
└── PrecisePrefixCacheConfig # precise prefix cache
19+
```
20+
21+
Model+storage classes are in `config_models.py`. Feature configs are in their own files.
22+
23+
## Usage
24+
25+
```python
26+
@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig], indirect=True)
27+
def test_something(self, llmisvc):
28+
...
29+
```
30+
31+
Override inline with `with_overrides()`:
32+
33+
```python
34+
@pytest.mark.parametrize("llmisvc", [TinyLlamaOciConfig.with_overrides(replicas=2)], indirect=True)
35+
```
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .config_base import LLMISvcConfig
2+
from .config_estimated_prefix_cache import EstimatedPrefixCacheConfig
3+
from .config_models import Opt125mHfConfig, QwenHfConfig, QwenS3Config, TinyLlamaOciConfig, TinyLlamaS3Config
4+
from .config_precise_prefix_cache import PrecisePrefixCacheConfig
5+
from .config_prefill_decode import PrefillDecodeConfig
6+
7+
__all__ = [
8+
"EstimatedPrefixCacheConfig",
9+
"LLMISvcConfig",
10+
"Opt125mHfConfig",
11+
"PrecisePrefixCacheConfig",
12+
"PrefillDecodeConfig",
13+
"QwenHfConfig",
14+
"QwenS3Config",
15+
"TinyLlamaOciConfig",
16+
"TinyLlamaS3Config",
17+
]

0 commit comments

Comments
 (0)