Skip to content

Commit 159fd07

Browse files
committed
fix precommit errors
1 parent 724334f commit 159fd07

File tree

4 files changed

+43
-30
lines changed

4 files changed

+43
-30
lines changed

tests/model_serving/model_server/llmd/conftest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,10 +375,10 @@ def singlenode_precise_prefix_cache(
375375
"name": "VLLM_ADDITIONAL_ARGS",
376376
"value": (
377377
f"--prefix-caching-hash-algo {PREFIX_CACHE_HASH_ALGO} --block-size {PREFIX_CACHE_BLOCK_SIZE} "
378-
"--kv_transfer_config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}' "
379-
"--kv-events-config '{\"enable_kv_cache_events\":true,\"publisher\":\"zmq\","
380-
"\"endpoint\":\"tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557\","
381-
"\"topic\":\"kv@${POD_IP}@${MODEL_NAME}\"}'"
378+
'--kv_transfer_config \'{"kv_connector":"NixlConnector","kv_role":"kv_both"}\' '
379+
'--kv-events-config \'{"enable_kv_cache_events":true,"publisher":"zmq",'
380+
'"endpoint":"tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557",'
381+
'"topic":"kv@${POD_IP}@${MODEL_NAME}"}\''
382382
),
383383
},
384384
{

tests/model_serving/model_server/llmd/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@
4141
],
4242
}
4343
],
44-
}
44+
}

tests/model_serving/model_server/llmd/test_singlenode_precise_prefix_cache.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
import pytest
2-
from kubernetes.dynamic import DynamicClient
3-
from ocp_resources.llm_inference_service import LLMInferenceService
4-
5-
from tests.model_serving.model_server.llmd.utils import (
6-
get_llmd_router_scheduler_pod,
7-
get_llmd_workload_pods,
8-
verify_gateway_status,
9-
verify_llm_service_status,
10-
verify_singlenode_prefix_cache_routing,
11-
)
12-
from simple_logger.logger import get_logger
13-
141
"""
152
Test Single-Node Precise Prefix Caching.
163
@@ -24,10 +11,24 @@
2411
- Send multiple requests with shared prefixes and size greater than PREFIX_CACHE_BLOCK_SIZE
2512
"""
2613

14+
import pytest
15+
from kubernetes.dynamic import DynamicClient
16+
from ocp_resources.llm_inference_service import LLMInferenceService
17+
18+
from tests.model_serving.model_server.llmd.utils import (
19+
get_llmd_router_scheduler_pod,
20+
get_llmd_workload_pods,
21+
verify_gateway_status,
22+
verify_llm_service_status,
23+
verify_singlenode_prefix_cache_routing,
24+
)
25+
from simple_logger.logger import get_logger
26+
2727
LOGGER = get_logger(name=__name__)
2828

2929
pytestmark = [pytest.mark.llmd_gpu]
3030

31+
3132
@pytest.mark.parametrize(
3233
"unprivileged_model_namespace",
3334
[pytest.param({"name": "singlenode-prefix-cache-test"})],
@@ -75,12 +76,14 @@ def test_singlenode_precise_prefix_cache(
7576
assert verify_llm_service_status(singlenode_precise_prefix_cache), "LLMInferenceService should be ready"
7677

7778
# Verify router-scheduler pod exists and is running
78-
router_scheduler_pod = get_llmd_router_scheduler_pod(unprivileged_client, singlenode_precise_prefix_cache)
79+
router_scheduler_pod = get_llmd_router_scheduler_pod(
80+
client=unprivileged_client, llmisvc=singlenode_precise_prefix_cache
81+
)
7982
assert router_scheduler_pod is not None, "Router-scheduler pod should exist"
8083
assert router_scheduler_pod.instance.status.phase == "Running", "Router-scheduler pod should be running"
8184

8285
# Verify workload pods
83-
workload_pods = get_llmd_workload_pods(unprivileged_client, singlenode_precise_prefix_cache)
86+
workload_pods = get_llmd_workload_pods(client=unprivileged_client, llmisvc=singlenode_precise_prefix_cache)
8487
assert len(workload_pods) == 2, f"Expected 2 workload pods, found {len(workload_pods)}"
8588

8689
# Test prefix cache routing (includes assertions for routing affinity)

tests/model_serving/model_server/llmd/utils.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -265,17 +265,17 @@ def count_chat_completions_requests_in_pod(pod: Pod) -> int:
265265
logs = pod.log(container="main", since_seconds=120)
266266

267267
# Match: "POST /v1/chat/completions HTTP/1.1" 200
268-
pattern = r'POST /v1/chat/completions HTTP/1.1.*200'
268+
pattern = r"POST /v1/chat/completions HTTP/1.1.*200"
269269
matches = re.findall(pattern, logs)
270270

271271
LOGGER.info(f"Pod {pod.name}: Found {len(matches)} requests matching pattern")
272272

273273
# Debug: Show sample log lines if no matches found
274274
if len(matches) == 0:
275-
log_lines = logs.split('\n')
275+
log_lines = logs.split("\n")
276276
LOGGER.info(f"Pod {pod.name}: Total log lines: {len(log_lines)}")
277277
# Show lines containing "POST" or "completions"
278-
relevant_lines = [line for line in log_lines if 'POST' in line or 'completion' in line.lower()]
278+
relevant_lines = [line for line in log_lines if "POST" in line or "completion" in line.lower()]
279279
if relevant_lines:
280280
LOGGER.info(f"Pod {pod.name}: Sample relevant lines (first 5):")
281281
for line in relevant_lines[:5]:
@@ -305,11 +305,11 @@ def get_pod_that_handled_request(
305305
Returns:
306306
Pod name that handled the request, or None if not found
307307
"""
308-
time.sleep(5)
308+
time.sleep(seconds=5)
309309

310310
current_counts = {}
311311
for pod in workload_pods:
312-
current_counts[pod.name] = count_chat_completions_requests_in_pod(pod)
312+
current_counts[pod.name] = count_chat_completions_requests_in_pod(pod=pod)
313313

314314
for pod in workload_pods:
315315
baseline = baseline_counts.get(pod.name, 0)
@@ -347,7 +347,7 @@ def verify_singlenode_prefix_cache_routing(
347347
baseline_counts = {}
348348

349349
for pod in workload_pods:
350-
baseline_counts[pod.name] = count_chat_completions_requests_in_pod(pod)
350+
baseline_counts[pod.name] = count_chat_completions_requests_in_pod(pod=pod)
351351

352352
# Phase 1: Repeated prompts (full cache hit)
353353
LOGGER.info("Phase 1: Testing repeated prompts")
@@ -363,7 +363,7 @@ def verify_singlenode_prefix_cache_routing(
363363
inference_config = {
364364
"default_query_model": {
365365
"query_input": repeated_prompt,
366-
"query_output": r'.*',
366+
"query_output": r".*",
367367
"use_regex": True,
368368
},
369369
"chat_completions": TINYLLAMA_INFERENCE_CONFIG["chat_completions"],
@@ -382,7 +382,12 @@ def verify_singlenode_prefix_cache_routing(
382382
authorized_user=True,
383383
)
384384

385-
handling_pod = get_pod_that_handled_request(workload_pods, repeated_prompt, timestamp_before, baseline_counts)
385+
handling_pod = get_pod_that_handled_request(
386+
workload_pods=workload_pods,
387+
query=repeated_prompt,
388+
timestamp_before=timestamp_before,
389+
baseline_counts=baseline_counts,
390+
)
386391
phase1_pods.append(handling_pod)
387392
if handling_pod:
388393
baseline_counts[handling_pod] = baseline_counts.get(handling_pod, 0) + 1
@@ -411,7 +416,7 @@ def verify_singlenode_prefix_cache_routing(
411416
inference_config = {
412417
"default_query_model": {
413418
"query_input": prompt,
414-
"query_output": r'.*',
419+
"query_output": r".*",
415420
"use_regex": True,
416421
},
417422
"chat_completions": TINYLLAMA_INFERENCE_CONFIG["chat_completions"],
@@ -430,7 +435,12 @@ def verify_singlenode_prefix_cache_routing(
430435
authorized_user=True,
431436
)
432437

433-
handling_pod = get_pod_that_handled_request(workload_pods, prompt, timestamp_before, baseline_counts)
438+
handling_pod = get_pod_that_handled_request(
439+
workload_pods=workload_pods,
440+
query=prompt,
441+
timestamp_before=timestamp_before,
442+
baseline_counts=baseline_counts,
443+
)
434444
phase2_pods.append(handling_pod)
435445
if handling_pod:
436446
baseline_counts[handling_pod] = baseline_counts.get(handling_pod, 0) + 1

0 commit comments

Comments
 (0)