Skip to content

Commit 32c2fda

Browse files
committed
Upgrade tests for llm-d
1 parent 9d4eb3a commit 32c2fda

File tree

5 files changed

+301
-2
lines changed

5 files changed

+301
-2
lines changed

tests/model_serving/model_server/llmd/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ def _create_llmisvc_from_config(
247247
namespace: str,
248248
client: DynamicClient,
249249
service_account: str | None = None,
250+
teardown: bool = True,
250251
) -> Generator[LLMInferenceService, Any]:
251252
"""Create an LLMInferenceService from a config class."""
252253
LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
@@ -283,7 +284,7 @@ def _create_llmisvc_from_config(
283284
"namespace": namespace,
284285
"annotations": config_cls.annotations(),
285286
"label": config_cls.labels(),
286-
"teardown": True,
287+
"teardown": teardown,
287288
"model": model,
288289
"replicas": config_cls.replicas,
289290
"router": config_cls.router_config(),

tests/model_serving/model_server/llmd/llmd_configs/config_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class LLMISvcConfig:
1818
container_image = None
1919
template_config_ref = "kserve-config-llm-template"
2020
enable_auth = False
21-
wait_timeout = 180
21+
wait_timeout = 240
2222

2323
@classmethod
2424
def container_resources(cls):

tests/model_serving/model_server/upgrade/conftest.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import yaml
66
from kubernetes.dynamic import DynamicClient
77
from ocp_resources.config_map import ConfigMap
8+
from ocp_resources.gateway import Gateway
89
from ocp_resources.inference_service import InferenceService
10+
from ocp_resources.llm_inference_service import LLMInferenceService
911
from ocp_resources.namespace import Namespace
1012
from ocp_resources.role import Role
1113
from ocp_resources.role_binding import RoleBinding
@@ -23,6 +25,7 @@
2325
ModelVersion,
2426
Protocols,
2527
RuntimeTemplates,
28+
Timeout,
2629
)
2730
from utilities.inference_utils import create_isvc
2831
from utilities.infra import (
@@ -32,6 +35,8 @@
3235
s3_endpoint_secret,
3336
update_configmap_data,
3437
)
38+
from utilities.llmd_constants import KServeGateway, LLMDGateway
39+
from utilities.llmd_utils import create_llmd_gateway
3540
from utilities.logger import RedactedString
3641
from utilities.serving_runtime import ServingRuntimeFromTemplate
3742

@@ -42,6 +47,7 @@
4247
MODEL_CAR_UPGRADE_NAMESPACE = "upgrade-model-car"
4348
METRICS_UPGRADE_NAMESPACE = "upgrade-metrics"
4449
PRIVATE_ENDPOINT_UPGRADE_NAMESPACE = "upgrade-private-endpoint"
50+
LLMD_UPGRADE_NAMESPACE = "upgrade-llmd"
4551
S3_CONNECTION = "upgrade-connection"
4652

4753

@@ -765,3 +771,88 @@ def private_endpoint_inference_service_fixture(
765771
**isvc_kwargs,
766772
) as isvc:
767773
yield isvc
774+
775+
776+
# LLMD Upgrade Fixtures
777+
@pytest.fixture(scope="session")
778+
def llmd_namespace_fixture(
779+
pytestconfig: pytest.Config,
780+
admin_client: DynamicClient,
781+
teardown_resources: bool,
782+
) -> Generator[Namespace, Any, Any]:
783+
"""Namespace for LLMD upgrade tests."""
784+
ns = Namespace(client=admin_client, name=LLMD_UPGRADE_NAMESPACE)
785+
786+
if pytestconfig.option.post_upgrade:
787+
yield ns
788+
ns.clean_up()
789+
else:
790+
with create_ns(
791+
admin_client=admin_client,
792+
name=LLMD_UPGRADE_NAMESPACE,
793+
model_mesh_enabled=False,
794+
add_dashboard_label=True,
795+
teardown=teardown_resources,
796+
) as ns:
797+
yield ns
798+
799+
800+
@pytest.fixture(scope="session")
801+
def llmd_gateway_fixture(
802+
pytestconfig: pytest.Config,
803+
admin_client: DynamicClient,
804+
teardown_resources: bool,
805+
) -> Generator[Gateway, Any, Any]:
806+
"""Shared LLMD Gateway for upgrade tests."""
807+
gateway = Gateway(
808+
client=admin_client,
809+
name=LLMDGateway.DEFAULT_NAME,
810+
namespace=LLMDGateway.DEFAULT_NAMESPACE,
811+
api_group=KServeGateway.API_GROUP,
812+
)
813+
814+
if pytestconfig.option.post_upgrade:
815+
yield gateway
816+
gateway.clean_up()
817+
else:
818+
with create_llmd_gateway(
819+
client=admin_client,
820+
namespace=LLMDGateway.DEFAULT_NAMESPACE,
821+
gateway_class_name=LLMDGateway.DEFAULT_CLASS,
822+
wait_for_condition=True,
823+
timeout=Timeout.TIMEOUT_1MIN,
824+
teardown=teardown_resources,
825+
) as gateway:
826+
yield gateway
827+
828+
829+
@pytest.fixture(scope="session")
830+
def llmd_inference_service_fixture(
831+
pytestconfig: pytest.Config,
832+
admin_client: DynamicClient,
833+
llmd_namespace_fixture: Namespace,
834+
llmd_gateway_fixture: Gateway,
835+
teardown_resources: bool,
836+
) -> Generator[LLMInferenceService, Any, Any]:
837+
"""LLMInferenceService using TinyLlama OCI for upgrade tests."""
838+
from tests.model_serving.model_server.llmd.conftest import _create_llmisvc_from_config
839+
from tests.model_serving.model_server.llmd.llmd_configs import TinyLlamaOciConfig
840+
841+
config_cls = TinyLlamaOciConfig
842+
llmisvc = LLMInferenceService(
843+
client=admin_client,
844+
name=config_cls.name,
845+
namespace=llmd_namespace_fixture.name,
846+
)
847+
848+
if pytestconfig.option.post_upgrade:
849+
yield llmisvc
850+
llmisvc.clean_up()
851+
else:
852+
with _create_llmisvc_from_config(
853+
config_cls=config_cls,
854+
namespace=llmd_namespace_fixture.name,
855+
client=admin_client,
856+
teardown=teardown_resources,
857+
) as llmisvc:
858+
yield llmisvc
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import pytest
2+
from ocp_resources.gateway import Gateway
3+
from ocp_resources.llm_inference_service import LLMInferenceService
4+
5+
from tests.model_serving.model_server.llmd.utils import (
6+
parse_completion_text,
7+
send_chat_completions,
8+
)
9+
from tests.model_serving.model_server.upgrade.utils import (
10+
verify_gateway_accepted,
11+
verify_llmd_pods_not_restarted,
12+
verify_llmd_router_not_restarted,
13+
)
14+
15+
pytestmark = [pytest.mark.llmd_cpu]
16+
17+
PROMPT = "What is the capital of Italy?"
18+
EXPECTED_ANSWER = "rome"
19+
20+
21+
class TestLlmdPreUpgrade:
22+
"""Pre-upgrade: deploy LLMD InferenceService and validate inference."""
23+
24+
@pytest.mark.pre_upgrade
25+
def test_llmd_llmisvc_deployed(self, llmd_inference_service_fixture: LLMInferenceService):
26+
"""Test steps:
27+
28+
1. Verify LLMInferenceService resource exists on the cluster.
29+
"""
30+
assert llmd_inference_service_fixture.exists, (
31+
f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist"
32+
)
33+
34+
@pytest.mark.pre_upgrade
35+
def test_llmd_inference_pre_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
36+
"""Test steps:
37+
38+
1. Send a chat completion request to /v1/chat/completions.
39+
2. Assert the response status is 200.
40+
3. Assert the completion text contains the expected answer.
41+
"""
42+
status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
43+
assert status == 200, f"Expected 200, got {status}: {body}"
44+
completion = parse_completion_text(response_body=body)
45+
assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"
46+
47+
48+
class TestLlmdPostUpgrade:
49+
"""Post-upgrade: verify LLMD deployment survived the platform upgrade."""
50+
51+
@pytest.mark.post_upgrade
52+
@pytest.mark.dependency(name="llmd_llmisvc_exists")
53+
def test_llmd_llmisvc_exists(self, llmd_inference_service_fixture: LLMInferenceService):
54+
"""Test steps:
55+
56+
1. Verify LLMInferenceService resource still exists after upgrade.
57+
"""
58+
assert llmd_inference_service_fixture.exists, (
59+
f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist after upgrade"
60+
)
61+
62+
@pytest.mark.post_upgrade
63+
def test_llmd_gateway_exists(self, llmd_gateway_fixture: Gateway):
64+
"""Test steps:
65+
66+
1. Verify the LLMD Gateway resource exists.
67+
2. Verify the Gateway has an Accepted condition set to True.
68+
"""
69+
verify_gateway_accepted(gateway=llmd_gateway_fixture)
70+
71+
@pytest.mark.post_upgrade
72+
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
73+
def test_llmd_workload_pods_not_restarted(
74+
self,
75+
admin_client,
76+
llmd_inference_service_fixture: LLMInferenceService,
77+
):
78+
"""Test steps:
79+
80+
1. Get all workload pods for the LLMInferenceService.
81+
2. Verify no container has restarted during the upgrade.
82+
"""
83+
verify_llmd_pods_not_restarted(
84+
client=admin_client,
85+
llmisvc=llmd_inference_service_fixture,
86+
)
87+
88+
@pytest.mark.post_upgrade
89+
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
90+
def test_llmd_router_scheduler_not_restarted(
91+
self,
92+
admin_client,
93+
llmd_inference_service_fixture: LLMInferenceService,
94+
):
95+
"""Test steps:
96+
97+
1. Get the router-scheduler pod for the LLMInferenceService.
98+
2. Verify no container has restarted during the upgrade.
99+
"""
100+
verify_llmd_router_not_restarted(
101+
client=admin_client,
102+
llmisvc=llmd_inference_service_fixture,
103+
)
104+
105+
@pytest.mark.post_upgrade
106+
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
107+
def test_llmd_inference_post_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
108+
"""Test steps:
109+
110+
1. Send a chat completion request to /v1/chat/completions.
111+
2. Assert the response status is 200.
112+
3. Assert the completion text contains the expected answer.
113+
"""
114+
status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
115+
assert status == 200, f"Expected 200, got {status}: {body}"
116+
completion = parse_completion_text(response_body=body)
117+
assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"

tests/model_serving/model_server/upgrade/utils.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from kubernetes.dynamic import DynamicClient
22
from ocp_resources.config_map import ConfigMap
3+
from ocp_resources.gateway import Gateway
34
from ocp_resources.inference_service import InferenceService
5+
from ocp_resources.llm_inference_service import LLMInferenceService
46
from ocp_resources.prometheus import Prometheus
57
from ocp_resources.route import Route
68

@@ -315,3 +317,91 @@ def verify_isvc_internal_access(isvc: InferenceService) -> str:
315317
raise AssertionError(f"InferenceService {isvc.name} has empty URL in status.address")
316318

317319
return url
320+
321+
322+
def verify_llmd_pods_not_restarted(
323+
client: DynamicClient,
324+
llmisvc: LLMInferenceService,
325+
max_restarts: int = 0,
326+
) -> None:
327+
"""
328+
Verify that workload pods for an LLMInferenceService have not restarted.
329+
330+
Args:
331+
client: DynamicClient instance
332+
llmisvc: LLMInferenceService instance
333+
max_restarts: Maximum allowed restart count (default 0)
334+
335+
Raises:
336+
PodContainersRestartError: If any container has restarted more than max_restarts times
337+
"""
338+
from tests.model_serving.model_server.llmd.utils import get_llmd_workload_pods
339+
340+
pods = get_llmd_workload_pods(client=client, llmisvc=llmisvc)
341+
restarted_containers: dict[str, list[str]] = {}
342+
343+
for pod in pods:
344+
if pod.instance.status.containerStatuses:
345+
for container in pod.instance.status.containerStatuses:
346+
if container.restartCount > max_restarts:
347+
restarted_containers.setdefault(pod.name, []).append(
348+
f"{container.name} (restarts: {container.restartCount})"
349+
)
350+
351+
if restarted_containers:
352+
raise PodContainersRestartError(f"LLMD workload containers restarted: {restarted_containers}")
353+
354+
355+
def verify_llmd_router_not_restarted(
356+
client: DynamicClient,
357+
llmisvc: LLMInferenceService,
358+
max_restarts: int = 0,
359+
) -> None:
360+
"""
361+
Verify that the router-scheduler pod for an LLMInferenceService has not restarted.
362+
363+
Args:
364+
client: DynamicClient instance
365+
llmisvc: LLMInferenceService instance
366+
max_restarts: Maximum allowed restart count (default 0)
367+
368+
Raises:
369+
PodContainersRestartError: If any container has restarted more than max_restarts times
370+
"""
371+
from tests.model_serving.model_server.llmd.utils import get_llmd_router_scheduler_pod
372+
373+
router_pod = get_llmd_router_scheduler_pod(client=client, llmisvc=llmisvc)
374+
if not router_pod:
375+
raise PodContainersRestartError(f"Router-scheduler pod not found for {llmisvc.name}")
376+
377+
restarted_containers: dict[str, list[str]] = {}
378+
if router_pod.instance.status.containerStatuses:
379+
for container in router_pod.instance.status.containerStatuses:
380+
if container.restartCount > max_restarts:
381+
restarted_containers.setdefault(router_pod.name, []).append(
382+
f"{container.name} (restarts: {container.restartCount})"
383+
)
384+
385+
if restarted_containers:
386+
raise PodContainersRestartError(f"LLMD router-scheduler containers restarted: {restarted_containers}")
387+
388+
389+
def verify_gateway_accepted(gateway: Gateway) -> None:
390+
"""
391+
Verify that a Gateway resource exists and has an Accepted condition.
392+
393+
Args:
394+
gateway: Gateway instance
395+
396+
Raises:
397+
AssertionError: If gateway does not exist or is not accepted
398+
"""
399+
if not gateway.exists:
400+
raise AssertionError(f"Gateway {gateway.name} does not exist in namespace {gateway.namespace}")
401+
402+
conditions = gateway.instance.status.get("conditions", [])
403+
is_accepted = any(
404+
condition.get("type") == "Accepted" and condition.get("status") == "True" for condition in conditions
405+
)
406+
if not is_accepted:
407+
raise AssertionError(f"Gateway {gateway.name} is not Accepted. Conditions: {conditions}")

0 commit comments

Comments
 (0)