@@ -39,7 +39,10 @@ def _collect_llmisvc_diagnostics(llmisvc: LLMInferenceService) -> str:
3939 conditions = llmisvc .instance .status .get ("conditions" , [])
4040 if conditions :
4141 lines .append (" Conditions:" )
42- lines .extend (f" { condition ['type' ]} : { condition ['status' ]} — { condition .get ('message' , '' )} " for condition in conditions )
42+ lines .extend (
43+ f" { condition ['type' ]} : { condition ['status' ]} — { condition .get ('message' , '' )} "
44+ for condition in conditions
45+ )
4346 else :
4447 lines .append (" No conditions reported." )
4548
@@ -66,13 +69,20 @@ def _collect_llmisvc_diagnostics(llmisvc: LLMInferenceService) -> str:
6669 try :
6770 _ , stdout , _ = run_command (
6871 command = [
69- "oc" , "get" , "events" , "-n" , llmisvc .namespace ,
70- "--field-selector" , f"involvedObject.name={ llmisvc .name } " ,
72+ "oc" ,
73+ "get" ,
74+ "events" ,
75+ "-n" ,
76+ llmisvc .namespace ,
77+ "--field-selector" ,
78+ f"involvedObject.name={ llmisvc .name } " ,
7179 "--sort-by=.lastTimestamp" ,
72- "-o" , "custom-columns=TYPE:.type,REASON:.reason,MESSAGE:.message" ,
80+ "-o" ,
81+ "custom-columns=TYPE:.type,REASON:.reason,MESSAGE:.message" ,
7382 "--no-headers" ,
7483 ],
75- verify_stderr = False , check = False ,
84+ verify_stderr = False ,
85+ check = False ,
7686 )
7787 if stdout .strip ():
7888 lines .append (" Recent events:" )
@@ -155,12 +165,20 @@ def _curl_post(
155165) -> tuple [int , str ]:
156166 """POST to URL via curl. Returns (status_code, response_body)."""
157167 cmd = [
158- "curl" , "-s" , "-w" , "\n %{http_code}" ,
159- "-X" , "POST" ,
160- "-H" , "Content-Type: application/json" ,
161- "-H" , "Accept: application/json" ,
162- "-d" , body ,
163- "--max-time" , str (timeout ),
168+ "curl" ,
169+ "-s" ,
170+ "-w" ,
171+ "\n %{http_code}" ,
172+ "-X" ,
173+ "POST" ,
174+ "-H" ,
175+ "Content-Type: application/json" ,
176+ "-H" ,
177+ "Accept: application/json" ,
178+ "-d" ,
179+ body ,
180+ "--max-time" ,
181+ str (timeout ),
164182 ]
165183 if token :
166184 cmd .extend (["-H" , f"Authorization: Bearer { token } " ])
@@ -172,9 +190,7 @@ def _curl_post(
172190
173191 _log_curl_command (url = url , body = body , token = bool (token ), ca_cert = ca_cert )
174192
175- _ , stdout , stderr = run_command (
176- command = cmd , verify_stderr = False , check = False , hide_log_command = True
177- )
193+ _ , stdout , stderr = run_command (command = cmd , verify_stderr = False , check = False , hide_log_command = True )
178194 if not stdout .strip ():
179195 raise ConnectionError (f"curl failed with no output: { stderr } " )
180196
@@ -206,10 +222,7 @@ def send_chat_completions(
206222
207223 border = "=" * 60
208224 LOGGER .info (
209- f"\n { border } \n Sending inference request: { llmisvc .name } "
210- f"\n URL: { url } "
211- f"\n Model: { model_name } "
212- f"\n { border } "
225+ f"\n { border } \n Sending inference request: { llmisvc .name } \n URL: { url } \n Model: { model_name } \n { border } "
213226 )
214227 status_code , response_body = _curl_post (url , body , token = token , ca_cert = ca_cert )
215228 LOGGER .info (f"Inference response — status={ status_code } \n { response_body } " )
@@ -260,7 +273,6 @@ def _check_pod_failure(pod: Pod) -> str | None:
260273 return None
261274
262275
263-
264276def assert_no_restarts (pods : list [Pod ]) -> None :
265277 """Assert no container in any pod has restarted."""
266278 for pod in pods :
@@ -361,7 +373,6 @@ def get_llmd_router_scheduler_pod(
361373 return None
362374
363375
364-
365376def query_metric_by_pod (
366377 prometheus : Prometheus ,
367378 metric_name : str ,
@@ -389,9 +400,7 @@ def assert_prefix_cache_routing(
389400 LOGGER .info (f"Request count by pod: { requests } " )
390401
391402 pods_with_traffic = [p for p , count in requests .items () if count > 0 ]
392- assert len (pods_with_traffic ) == 1 , (
393- f"Expected traffic on exactly 1 pod, got { len (pods_with_traffic )} : { requests } "
394- )
403+ assert len (pods_with_traffic ) == 1 , f"Expected traffic on exactly 1 pod, got { len (pods_with_traffic )} : { requests } "
395404
396405 active_pod = pods_with_traffic [0 ]
397406 assert requests [active_pod ] == expected_requests , (
@@ -412,9 +421,7 @@ def assert_prefix_cache_routing(
412421def assert_scheduler_routing (router_pod : Pod , min_decisions : int ) -> bool :
413422 """Assert scheduler made enough routing decisions. Retries for log propagation."""
414423 logs = get_scheduler_decision_logs (router_scheduler_pod = router_pod )
415- assert len (logs ) >= min_decisions , (
416- f"Expected >= { min_decisions } scheduler decisions, got { len (logs )} "
417- )
424+ assert len (logs ) >= min_decisions , f"Expected >= { min_decisions } scheduler decisions, got { len (logs )} "
418425 return True
419426
420427
@@ -430,17 +437,13 @@ def send_prefix_cache_requests(
430437 successful = 0
431438 for i in range (count ):
432439 try :
433- status , _ = send_chat_completions (
434- llmisvc , prompt = prompt , token = token , insecure = False
435- )
440+ status , _ = send_chat_completions (llmisvc , prompt = prompt , token = token , insecure = False )
436441 if status == 200 :
437442 successful += 1
438443 except Exception as e : # noqa: BLE001
439444 LOGGER .error (f"Request { i + 1 } /{ count } failed: { e } " )
440445 LOGGER .info (f"{ successful } /{ count } requests succeeded" )
441- assert successful >= count * min_ratio , (
442- f"Too many failures: { successful } /{ count } (need { min_ratio * 100 } %)"
443- )
446+ assert successful >= count * min_ratio , f"Too many failures: { successful } /{ count } (need { min_ratio * 100 } %)"
444447 return successful
445448
446449
@@ -475,5 +478,3 @@ def get_scheduler_decision_logs(
475478
476479 LOGGER .info (f"Retrieved { len (json_logs )} logs from router-scheduler pod" )
477480 return json_logs
478-
479-
0 commit comments