Skip to content

Commit 6222ea4

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 6ef95f2 commit 6222ea4

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

tests/model_serving/model_server/llmd/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
"periodSeconds": 30,
66
"timeoutSeconds": 30,
77
"failureThreshold": 5,
8-
}
8+
}

tests/model_serving/model_server/llmd/test_singlenode_precise_prefix_cache.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
weight: 5.0
6363
"""
6464

65+
6566
@pytest.mark.parametrize(
6667
"unprivileged_model_namespace",
6768
[pytest.param({"name": "singlenode-prefix-cache-test"})],
@@ -125,7 +126,6 @@ def test_singlenode_precise_prefix_cache(
125126
)
126127

127128

128-
129129
@pytest.fixture(scope="class")
130130
def singlenode_precise_prefix_cache(
131131
admin_client: DynamicClient,
@@ -161,10 +161,10 @@ def singlenode_precise_prefix_cache(
161161
"name": "VLLM_ADDITIONAL_ARGS",
162162
"value": (
163163
f"--prefix-caching-hash-algo {PREFIX_CACHE_HASH_ALGO} --block-size {PREFIX_CACHE_BLOCK_SIZE} "
164-
"--kv_transfer_config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}' "
165-
"--kv-events-config '{\"enable_kv_cache_events\":true,\"publisher\":\"zmq\","
166-
"\"endpoint\":\"tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557\","
167-
"\"topic\":\"kv@${POD_IP}@${MODEL_NAME}\"}'"
164+
'--kv_transfer_config \'{"kv_connector":"NixlConnector","kv_role":"kv_both"}\' '
165+
'--kv-events-config \'{"enable_kv_cache_events":true,"publisher":"zmq",'
166+
'"endpoint":"tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557",'
167+
'"topic":"kv@${POD_IP}@${MODEL_NAME}"}\''
168168
),
169169
},
170170
{
@@ -284,6 +284,7 @@ def get_llmd_router_scheduler_pod(
284284
return pod
285285
return None
286286

287+
287288
def count_chat_completions_requests_in_pod(pod: Pod) -> int:
288289
"""
289290
Count POST /v1/chat/completions requests in pod logs.
@@ -300,17 +301,17 @@ def count_chat_completions_requests_in_pod(pod: Pod) -> int:
300301
logs = pod.log(container="main", since_seconds=120)
301302

302303
# Match: "POST /v1/chat/completions HTTP/1.1" 200
303-
pattern = r'POST /v1/chat/completions HTTP/1.1.*200'
304+
pattern = r"POST /v1/chat/completions HTTP/1.1.*200"
304305
matches = re.findall(pattern, logs)
305306

306307
LOGGER.info(f"Pod {pod.name}: Found {len(matches)} requests matching pattern")
307308

308309
# Debug: Show sample log lines if no matches found
309310
if len(matches) == 0:
310-
log_lines = logs.split('\n')
311+
log_lines = logs.split("\n")
311312
LOGGER.info(f"Pod {pod.name}: Total log lines: {len(log_lines)}")
312313
# Show lines containing "POST" or "completions"
313-
relevant_lines = [line for line in log_lines if 'POST' in line or 'completion' in line.lower()]
314+
relevant_lines = [line for line in log_lines if "POST" in line or "completion" in line.lower()]
314315
if relevant_lines:
315316
LOGGER.info(f"Pod {pod.name}: Sample relevant lines (first 5):")
316317
for line in relevant_lines[:5]:
@@ -401,7 +402,7 @@ def verify_singlenode_prefix_cache_routing(
401402
inference_config = {
402403
"default_query_model": {
403404
"query_input": repeated_prompt,
404-
"query_output": r'.*',
405+
"query_output": r".*",
405406
"use_regex": True,
406407
},
407408
"chat_completions": TINYLLAMA_INFERENCE_CONFIG["chat_completions"],
@@ -449,7 +450,7 @@ def verify_singlenode_prefix_cache_routing(
449450
inference_config = {
450451
"default_query_model": {
451452
"query_input": prompt,
452-
"query_output": r'.*',
453+
"query_output": r".*",
453454
"use_regex": True,
454455
},
455456
"chat_completions": TINYLLAMA_INFERENCE_CONFIG["chat_completions"],
@@ -485,7 +486,7 @@ def verify_singlenode_prefix_cache_routing(
485486
inference_config = {
486487
"default_query_model": {
487488
"query_input": different_prompt,
488-
"query_output": r'.*',
489+
"query_output": r".*",
489490
"use_regex": True,
490491
},
491492
"chat_completions": TINYLLAMA_INFERENCE_CONFIG["chat_completions"],
@@ -503,4 +504,4 @@ def verify_singlenode_prefix_cache_routing(
503504
authorized_user=True,
504505
)
505506

506-
LOGGER.info("All cache routing tests completed successfully")
507+
LOGGER.info("All cache routing tests completed successfully")

0 commit comments

Comments
 (0)