Skip to content

Commit a2618da

Browse files
committed
nitpick comments
1 parent 2aa4b3d commit a2618da

File tree

3 files changed

+8
-12
lines changed

3 files changed

+8
-12
lines changed

tests/model_serving/model_server/llmd/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def llmisvc_auth_pair(
159159

160160

161161
# ===========================================
162-
# Auth — SA + RBAC + token
162+
# Auth — SA + RBAC + token
163163
# ===========================================
164164
@pytest.fixture(scope="class")
165165
def llmisvc_token(

tests/model_serving/model_server/llmd/llmd_configs/config_prefill_decode.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@ class PrefillDecodeConfig(QwenS3Config):
99
enable_auth = False
1010
name = "llmisvc-prefill-decode-gpu"
1111

12-
@classmethod
13-
def container_resources(cls):
14-
return {
15-
"limits": {"cpu": "4", "memory": "32Gi", "nvidia.com/gpu": "1"},
16-
"requests": {"cpu": "2", "memory": "16Gi", "nvidia.com/gpu": "1"},
17-
}
18-
1912
@classmethod
2013
def prefill_config(cls):
2114
return {

tests/model_serving/model_server/llmd/utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,11 @@ def send_chat_completions(
154154

155155
def parse_completion_text(response_body: str) -> str:
156156
"""Extract completion text from a chat completion response."""
157-
data = json.loads(response_body)
158-
return data["choices"][0]["message"]["content"]
157+
try:
158+
data = json.loads(response_body)
159+
return data["choices"][0]["message"]["content"]
160+
except (json.JSONDecodeError, KeyError, IndexError, TypeError) as e:
161+
raise ValueError(f"Failed to parse completion response: {e}\nBody: {response_body[:500]}") from e
159162

160163

161164
def get_llmd_workload_pods(
@@ -292,8 +295,8 @@ def send_prefix_cache_requests(
292295
status, _ = send_chat_completions(llmisvc=llmisvc, prompt=prompt, token=token, insecure=False)
293296
if status == 200:
294297
successful += 1
295-
except Exception as e: # noqa: BLE001
296-
LOGGER.error(f"Request {i + 1}/{count} failed: {e}")
298+
except Exception:
299+
LOGGER.exception(f"Request {i + 1}/{count} failed")
297300
LOGGER.info(f"{successful}/{count} requests succeeded")
298301
assert successful >= count * min_ratio, f"Too many failures: {successful}/{count} (need {min_ratio * 100}%)"
299302
return successful

0 commit comments

Comments
 (0)