Skip to content

Commit 4e078cb

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 61eaaa7 commit 4e078cb

File tree

4 files changed

+41
-42
lines changed

4 files changed

+41
-42
lines changed

tests/model_serving/model_server/llmd_v2/llmd_configs/config_base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ def container_env(cls):
109109
# --max-model-len 128
110110
# --enforce-eager
111111
return super().container_env() + [
112-
{"name": "VLLM_ADDITIONAL_ARGS", "value": "--max-num-seqs 20 --max-model-len 128 --enforce-eager --ssl-ciphers ECDHE+AESGCM:DHE+AESGCM"},
112+
{
113+
"name": "VLLM_ADDITIONAL_ARGS",
114+
"value": "--max-num-seqs 20 --max-model-len 128 --enforce-eager --ssl-ciphers ECDHE+AESGCM:DHE+AESGCM",
115+
},
113116
{"name": "VLLM_CPU_KVCACHE_SPACE", "value": "4"},
114117
]
115118

@@ -138,4 +141,3 @@ def container_resources(cls):
138141
"nvidia.com/gpu": ResourceLimits.GPU.REQUEST,
139142
},
140143
}
141-

tests/model_serving/model_server/llmd_v2/llmd_configs/config_precise_prefix_cache.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,7 @@ def _scheduler_container(cls):
105105
{"name": "zmq", "containerPort": 5557, "protocol": "TCP"},
106106
],
107107
"env": [{"name": "HF_HOME", "value": "/mnt/tokenizers"}],
108-
"volumeMounts": [
109-
{"name": "tokenizers", "mountPath": "/mnt/tokenizers", "readOnly": False}
110-
],
108+
"volumeMounts": [{"name": "tokenizers", "mountPath": "/mnt/tokenizers", "readOnly": False}],
111109
"args": [
112110
"--v=4",
113111
"--pool-name",

tests/model_serving/model_server/llmd_v2/tier_1/test_llmd_auth.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ def test_llmisvc_authorized(self, llmisvc_with_auth):
3131
expected = "rome"
3232

3333
for entry in llmisvc_with_auth:
34-
status, body = send_chat_completions(
35-
entry["service"], prompt=prompt, token=entry["token"], insecure=False
36-
)
34+
status, body = send_chat_completions(entry["service"], prompt=prompt, token=entry["token"], insecure=False)
3735
assert status == 200, f"Authorized request failed with {status}: {body}"
3836
completion = parse_completion_text(body)
3937
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd_v2/utils.py

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ def _collect_llmisvc_diagnostics(llmisvc: LLMInferenceService) -> str:
3939
conditions = llmisvc.instance.status.get("conditions", [])
4040
if conditions:
4141
lines.append(" Conditions:")
42-
lines.extend(f" {condition['type']}: {condition['status']}{condition.get('message', '')}" for condition in conditions)
42+
lines.extend(
43+
f" {condition['type']}: {condition['status']}{condition.get('message', '')}"
44+
for condition in conditions
45+
)
4346
else:
4447
lines.append(" No conditions reported.")
4548

@@ -66,13 +69,20 @@ def _collect_llmisvc_diagnostics(llmisvc: LLMInferenceService) -> str:
6669
try:
6770
_, stdout, _ = run_command(
6871
command=[
69-
"oc", "get", "events", "-n", llmisvc.namespace,
70-
"--field-selector", f"involvedObject.name={llmisvc.name}",
72+
"oc",
73+
"get",
74+
"events",
75+
"-n",
76+
llmisvc.namespace,
77+
"--field-selector",
78+
f"involvedObject.name={llmisvc.name}",
7179
"--sort-by=.lastTimestamp",
72-
"-o", "custom-columns=TYPE:.type,REASON:.reason,MESSAGE:.message",
80+
"-o",
81+
"custom-columns=TYPE:.type,REASON:.reason,MESSAGE:.message",
7382
"--no-headers",
7483
],
75-
verify_stderr=False, check=False,
84+
verify_stderr=False,
85+
check=False,
7686
)
7787
if stdout.strip():
7888
lines.append(" Recent events:")
@@ -155,12 +165,20 @@ def _curl_post(
155165
) -> tuple[int, str]:
156166
"""POST to URL via curl. Returns (status_code, response_body)."""
157167
cmd = [
158-
"curl", "-s", "-w", "\n%{http_code}",
159-
"-X", "POST",
160-
"-H", "Content-Type: application/json",
161-
"-H", "Accept: application/json",
162-
"-d", body,
163-
"--max-time", str(timeout),
168+
"curl",
169+
"-s",
170+
"-w",
171+
"\n%{http_code}",
172+
"-X",
173+
"POST",
174+
"-H",
175+
"Content-Type: application/json",
176+
"-H",
177+
"Accept: application/json",
178+
"-d",
179+
body,
180+
"--max-time",
181+
str(timeout),
164182
]
165183
if token:
166184
cmd.extend(["-H", f"Authorization: Bearer {token}"])
@@ -172,9 +190,7 @@ def _curl_post(
172190

173191
_log_curl_command(url=url, body=body, token=bool(token), ca_cert=ca_cert)
174192

175-
_, stdout, stderr = run_command(
176-
command=cmd, verify_stderr=False, check=False, hide_log_command=True
177-
)
193+
_, stdout, stderr = run_command(command=cmd, verify_stderr=False, check=False, hide_log_command=True)
178194
if not stdout.strip():
179195
raise ConnectionError(f"curl failed with no output: {stderr}")
180196

@@ -206,10 +222,7 @@ def send_chat_completions(
206222

207223
border = "=" * 60
208224
LOGGER.info(
209-
f"\n{border}\n Sending inference request: {llmisvc.name}"
210-
f"\n URL: {url}"
211-
f"\n Model: {model_name}"
212-
f"\n{border}"
225+
f"\n{border}\n Sending inference request: {llmisvc.name}\n URL: {url}\n Model: {model_name}\n{border}"
213226
)
214227
status_code, response_body = _curl_post(url, body, token=token, ca_cert=ca_cert)
215228
LOGGER.info(f"Inference response — status={status_code}\n{response_body}")
@@ -260,7 +273,6 @@ def _check_pod_failure(pod: Pod) -> str | None:
260273
return None
261274

262275

263-
264276
def assert_no_restarts(pods: list[Pod]) -> None:
265277
"""Assert no container in any pod has restarted."""
266278
for pod in pods:
@@ -361,7 +373,6 @@ def get_llmd_router_scheduler_pod(
361373
return None
362374

363375

364-
365376
def query_metric_by_pod(
366377
prometheus: Prometheus,
367378
metric_name: str,
@@ -389,9 +400,7 @@ def assert_prefix_cache_routing(
389400
LOGGER.info(f"Request count by pod: {requests}")
390401

391402
pods_with_traffic = [p for p, count in requests.items() if count > 0]
392-
assert len(pods_with_traffic) == 1, (
393-
f"Expected traffic on exactly 1 pod, got {len(pods_with_traffic)}: {requests}"
394-
)
403+
assert len(pods_with_traffic) == 1, f"Expected traffic on exactly 1 pod, got {len(pods_with_traffic)}: {requests}"
395404

396405
active_pod = pods_with_traffic[0]
397406
assert requests[active_pod] == expected_requests, (
@@ -412,9 +421,7 @@ def assert_prefix_cache_routing(
412421
def assert_scheduler_routing(router_pod: Pod, min_decisions: int) -> bool:
413422
"""Assert scheduler made enough routing decisions. Retries for log propagation."""
414423
logs = get_scheduler_decision_logs(router_scheduler_pod=router_pod)
415-
assert len(logs) >= min_decisions, (
416-
f"Expected >= {min_decisions} scheduler decisions, got {len(logs)}"
417-
)
424+
assert len(logs) >= min_decisions, f"Expected >= {min_decisions} scheduler decisions, got {len(logs)}"
418425
return True
419426

420427

@@ -430,17 +437,13 @@ def send_prefix_cache_requests(
430437
successful = 0
431438
for i in range(count):
432439
try:
433-
status, _ = send_chat_completions(
434-
llmisvc, prompt=prompt, token=token, insecure=False
435-
)
440+
status, _ = send_chat_completions(llmisvc, prompt=prompt, token=token, insecure=False)
436441
if status == 200:
437442
successful += 1
438443
except Exception as e: # noqa: BLE001
439444
LOGGER.error(f"Request {i + 1}/{count} failed: {e}")
440445
LOGGER.info(f"{successful}/{count} requests succeeded")
441-
assert successful >= count * min_ratio, (
442-
f"Too many failures: {successful}/{count} (need {min_ratio * 100}%)"
443-
)
446+
assert successful >= count * min_ratio, f"Too many failures: {successful}/{count} (need {min_ratio * 100}%)"
444447
return successful
445448

446449

@@ -475,5 +478,3 @@ def get_scheduler_decision_logs(
475478

476479
LOGGER.info(f"Retrieved {len(json_logs)} logs from router-scheduler pod")
477480
return json_logs
478-
479-

0 commit comments

Comments
 (0)