6262 weight: 5.0
6363"""
6464
65+
6566@pytest .mark .parametrize (
6667 "unprivileged_model_namespace" ,
6768 [pytest .param ({"name" : "singlenode-prefix-cache-test" })],
@@ -125,7 +126,6 @@ def test_singlenode_precise_prefix_cache(
125126 )
126127
127128
128-
129129@pytest .fixture (scope = "class" )
130130def singlenode_precise_prefix_cache (
131131 admin_client : DynamicClient ,
@@ -161,10 +161,10 @@ def singlenode_precise_prefix_cache(
161161 "name" : "VLLM_ADDITIONAL_ARGS" ,
162162 "value" : (
163163 f"--prefix-caching-hash-algo { PREFIX_CACHE_HASH_ALGO } --block-size { PREFIX_CACHE_BLOCK_SIZE } "
164- " --kv_transfer_config '{ \ " kv_connector\" : \ " NixlConnector\" , \ " kv_role\" : \ " kv_both\" }' "
165- " --kv-events-config '{ \ " enable_kv_cache_events\ " :true,\ " publisher\" : \ " zmq\" ,"
166- " \" endpoint\" : \ " tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557\" ,"
167- " \" topic\" : \ " kv@${POD_IP}@${MODEL_NAME}\" }'"
164+ ' --kv_transfer_config \' { "kv_connector": "NixlConnector", "kv_role": "kv_both"} \' '
165+ ' --kv-events-config \' { "enable_kv_cache_events":true,"publisher": "zmq",'
166+ '" endpoint": "tcp://{{ ChildName .ObjectMeta.Name `-epp-service` }}:5557",'
167+ '" topic": "kv@${POD_IP}@${MODEL_NAME}"} \' '
168168 ),
169169 },
170170 {
@@ -284,6 +284,7 @@ def get_llmd_router_scheduler_pod(
284284 return pod
285285 return None
286286
287+
287288def count_chat_completions_requests_in_pod (pod : Pod ) -> int :
288289 """
289290 Count POST /v1/chat/completions requests in pod logs.
@@ -300,17 +301,17 @@ def count_chat_completions_requests_in_pod(pod: Pod) -> int:
300301 logs = pod .log (container = "main" , since_seconds = 120 )
301302
302303 # Match: "POST /v1/chat/completions HTTP/1.1" 200
303- pattern = r' POST /v1/chat/completions HTTP/1.1.*200'
304+ pattern = r" POST /v1/chat/completions HTTP/1.1.*200"
304305 matches = re .findall (pattern , logs )
305306
306307 LOGGER .info (f"Pod { pod .name } : Found { len (matches )} requests matching pattern" )
307308
308309 # Debug: Show sample log lines if no matches found
309310 if len (matches ) == 0 :
310- log_lines = logs .split (' \n ' )
311+ log_lines = logs .split (" \n " )
311312 LOGGER .info (f"Pod { pod .name } : Total log lines: { len (log_lines )} " )
312313 # Show lines containing "POST" or "completions"
313- relevant_lines = [line for line in log_lines if ' POST' in line or ' completion' in line .lower ()]
314+ relevant_lines = [line for line in log_lines if " POST" in line or " completion" in line .lower ()]
314315 if relevant_lines :
315316 LOGGER .info (f"Pod { pod .name } : Sample relevant lines (first 5):" )
316317 for line in relevant_lines [:5 ]:
@@ -401,7 +402,7 @@ def verify_singlenode_prefix_cache_routing(
401402 inference_config = {
402403 "default_query_model" : {
403404 "query_input" : repeated_prompt ,
404- "query_output" : r'.*' ,
405+ "query_output" : r".*" ,
405406 "use_regex" : True ,
406407 },
407408 "chat_completions" : TINYLLAMA_INFERENCE_CONFIG ["chat_completions" ],
@@ -449,7 +450,7 @@ def verify_singlenode_prefix_cache_routing(
449450 inference_config = {
450451 "default_query_model" : {
451452 "query_input" : prompt ,
452- "query_output" : r'.*' ,
453+ "query_output" : r".*" ,
453454 "use_regex" : True ,
454455 },
455456 "chat_completions" : TINYLLAMA_INFERENCE_CONFIG ["chat_completions" ],
@@ -485,7 +486,7 @@ def verify_singlenode_prefix_cache_routing(
485486 inference_config = {
486487 "default_query_model" : {
487488 "query_input" : different_prompt ,
488- "query_output" : r'.*' ,
489+ "query_output" : r".*" ,
489490 "use_regex" : True ,
490491 },
491492 "chat_completions" : TINYLLAMA_INFERENCE_CONFIG ["chat_completions" ],
@@ -503,4 +504,4 @@ def verify_singlenode_prefix_cache_routing(
503504 authorized_user = True ,
504505 )
505506
506- LOGGER .info ("All cache routing tests completed successfully" )
507+ LOGGER .info ("All cache routing tests completed successfully" )
0 commit comments