|
20 | 20 | get_llmd_workload_pods, |
21 | 21 | verify_gateway_status, |
22 | 22 | verify_llm_service_status, |
23 | | - verify_singlenode_prefix_cache_routing, |
| 23 | + verify_singlenode_precise_prefix_cache_routing, |
24 | 24 | ) |
25 | 25 | from simple_logger.logger import get_logger |
26 | 26 |
|
|
31 | 31 |
|
32 | 32 | @pytest.mark.parametrize( |
33 | 33 | "unprivileged_model_namespace", |
34 | | - [pytest.param({"name": "singlenode-prefix-cache-test"})], |
| 34 | + [pytest.param({"name": "llmd-singlenode-prefix-cache-test"})], |
35 | 35 | indirect=True, |
36 | 36 | ) |
37 | 37 | @pytest.mark.parametrize( |
@@ -60,23 +60,21 @@ def test_singlenode_precise_prefix_cache( |
60 | 60 | if gpu_count_on_cluster < 2: |
61 | 61 | pytest.skip(f"Test requires at least 2 GPUs (found {gpu_count_on_cluster})") |
62 | 62 |
|
63 | | - # Verify gateway and service are ready |
| 63 | + # Verify infrastructure is ready before testing routing |
64 | 64 | assert verify_gateway_status(llmd_gateway), "Gateway should be ready" |
65 | 65 | assert verify_llm_service_status(singlenode_precise_prefix_cache), "LLMInferenceService should be ready" |
66 | 66 |
|
67 | | - # Verify router-scheduler pod exists and is running |
68 | 67 | router_scheduler_pod = get_llmd_router_scheduler_pod( |
69 | 68 | client=unprivileged_client, llmisvc=singlenode_precise_prefix_cache |
70 | 69 | ) |
71 | 70 | assert router_scheduler_pod is not None, "Router-scheduler pod should exist" |
72 | 71 | assert router_scheduler_pod.instance.status.phase == "Running", "Router-scheduler pod should be running" |
73 | 72 |
|
74 | | - # Verify workload pods |
75 | 73 | workload_pods = get_llmd_workload_pods(client=unprivileged_client, llmisvc=singlenode_precise_prefix_cache) |
76 | 74 | assert len(workload_pods) == 2, f"Expected 2 workload pods, found {len(workload_pods)}" |
77 | 75 |
|
78 | | - # Test prefix cache routing (includes assertions for routing affinity) |
79 | | - verify_singlenode_prefix_cache_routing( |
| 76 | + # Verify prefix cache routing behavior |
| 77 | + verify_singlenode_precise_prefix_cache_routing( |
80 | 78 | llmisvc=singlenode_precise_prefix_cache, |
81 | 79 | token=authenticated_llmisvc_token, |
82 | 80 | workload_pods=workload_pods, |
|
0 commit comments