@@ -265,17 +265,17 @@ def count_chat_completions_requests_in_pod(pod: Pod) -> int:
265265 logs = pod .log (container = "main" , since_seconds = 120 )
266266
267267 # Match: "POST /v1/chat/completions HTTP/1.1" 200
268- pattern = r' POST /v1/chat/completions HTTP/1.1.*200'
268+ pattern = r" POST /v1/chat/completions HTTP/1.1.*200"
269269 matches = re .findall (pattern , logs )
270270
271271 LOGGER .info (f"Pod { pod .name } : Found { len (matches )} requests matching pattern" )
272272
273273 # Debug: Show sample log lines if no matches found
274274 if len (matches ) == 0 :
275- log_lines = logs .split (' \n ' )
275+ log_lines = logs .split (" \n " )
276276 LOGGER .info (f"Pod { pod .name } : Total log lines: { len (log_lines )} " )
277277 # Show lines containing "POST" or "completions"
278- relevant_lines = [line for line in log_lines if ' POST' in line or ' completion' in line .lower ()]
278+ relevant_lines = [line for line in log_lines if " POST" in line or " completion" in line .lower ()]
279279 if relevant_lines :
280280 LOGGER .info (f"Pod { pod .name } : Sample relevant lines (first 5):" )
281281 for line in relevant_lines [:5 ]:
@@ -305,11 +305,11 @@ def get_pod_that_handled_request(
305305 Returns:
306306 Pod name that handled the request, or None if not found
307307 """
308- time .sleep (5 )
308+ time .sleep (seconds = 5 )
309309
310310 current_counts = {}
311311 for pod in workload_pods :
312- current_counts [pod .name ] = count_chat_completions_requests_in_pod (pod )
312+ current_counts [pod .name ] = count_chat_completions_requests_in_pod (pod = pod )
313313
314314 for pod in workload_pods :
315315 baseline = baseline_counts .get (pod .name , 0 )
@@ -347,7 +347,7 @@ def verify_singlenode_prefix_cache_routing(
347347 baseline_counts = {}
348348
349349 for pod in workload_pods :
350- baseline_counts [pod .name ] = count_chat_completions_requests_in_pod (pod )
350+ baseline_counts [pod .name ] = count_chat_completions_requests_in_pod (pod = pod )
351351
352352 # Phase 1: Repeated prompts (full cache hit)
353353 LOGGER .info ("Phase 1: Testing repeated prompts" )
@@ -363,7 +363,7 @@ def verify_singlenode_prefix_cache_routing(
363363 inference_config = {
364364 "default_query_model" : {
365365 "query_input" : repeated_prompt ,
366- "query_output" : r'.*' ,
366+ "query_output" : r".*" ,
367367 "use_regex" : True ,
368368 },
369369 "chat_completions" : TINYLLAMA_INFERENCE_CONFIG ["chat_completions" ],
@@ -382,7 +382,12 @@ def verify_singlenode_prefix_cache_routing(
382382 authorized_user = True ,
383383 )
384384
385- handling_pod = get_pod_that_handled_request (workload_pods , repeated_prompt , timestamp_before , baseline_counts )
385+ handling_pod = get_pod_that_handled_request (
386+ workload_pods = workload_pods ,
387+ query = repeated_prompt ,
388+ timestamp_before = timestamp_before ,
389+ baseline_counts = baseline_counts ,
390+ )
386391 phase1_pods .append (handling_pod )
387392 if handling_pod :
388393 baseline_counts [handling_pod ] = baseline_counts .get (handling_pod , 0 ) + 1
@@ -411,7 +416,7 @@ def verify_singlenode_prefix_cache_routing(
411416 inference_config = {
412417 "default_query_model" : {
413418 "query_input" : prompt ,
414- "query_output" : r'.*' ,
419+ "query_output" : r".*" ,
415420 "use_regex" : True ,
416421 },
417422 "chat_completions" : TINYLLAMA_INFERENCE_CONFIG ["chat_completions" ],
@@ -430,7 +435,12 @@ def verify_singlenode_prefix_cache_routing(
430435 authorized_user = True ,
431436 )
432437
433- handling_pod = get_pod_that_handled_request (workload_pods , prompt , timestamp_before , baseline_counts )
438+ handling_pod = get_pod_that_handled_request (
439+ workload_pods = workload_pods ,
440+ query = prompt ,
441+ timestamp_before = timestamp_before ,
442+ baseline_counts = baseline_counts ,
443+ )
434444 phase2_pods .append (handling_pod )
435445 if handling_pod :
436446 baseline_counts [handling_pod ] = baseline_counts .get (handling_pod , 0 ) + 1
0 commit comments