Skip to content

Commit 4172595

Browse files
committed
run precommit
1 parent 41851dd commit 4172595

12 files changed

+60
-38
lines changed

tests/model_serving/model_server/llmd/conftest.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
import pytest
88
import yaml
9-
10-
logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
119
from _pytest.fixtures import FixtureRequest
1210
from kubernetes.dynamic import DynamicClient
1311
from ocp_resources.config_map import ConfigMap
@@ -19,17 +17,18 @@
1917
from ocp_resources.service_account import ServiceAccount
2018
from simple_logger.logger import get_logger
2119

22-
from tests.model_serving.model_server.llmd_v2.llmd_configs import TinyLlamaOciConfig
23-
from tests.model_serving.model_server.llmd_v2.utils import wait_for_llmisvc
20+
from tests.model_serving.model_server.llmd.llmd_configs import TinyLlamaOciConfig
21+
from tests.model_serving.model_server.llmd.utils import wait_for_llmisvc
2422
from utilities.constants import Timeout
2523
from utilities.infra import create_inference_token, s3_endpoint_secret, update_configmap_data
2624
from utilities.llmd_constants import LLMDGateway
2725
from utilities.llmd_utils import create_llmd_gateway
2826
from utilities.logger import RedactedString
2927

3028
LOGGER = get_logger(name=__name__)
29+
logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
3130

32-
AuthEntry = namedtuple("AuthEntry", ["service", "token"])
31+
AuthEntry = namedtuple(typename="AuthEntry", field_names=["service", "token"])
3332

3433

3534
# ===========================================

tests/model_serving/model_server/llmd/llmd_configs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
__all__ = [
88
"EstimatedPrefixCacheConfig",
99
"LLMISvcConfig",
10-
"TinyLlamaHfConfig",
1110
"PrecisePrefixCacheConfig",
1211
"PrefillDecodeConfig",
1312
"QwenHfConfig",
1413
"QwenS3Config",
14+
"TinyLlamaHfConfig",
1515
"TinyLlamaOciConfig",
1616
"TinyLlamaS3Config",
1717
]

tests/model_serving/model_server/llmd/test_llmd_auth.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
pytestmark = [pytest.mark.tier1, pytest.mark.cpu]
1010

11-
NAMESPACE = ns_from_file(__file__)
11+
NAMESPACE = ns_from_file(file=__file__)
1212

1313

1414
@pytest.mark.parametrize(
@@ -32,9 +32,11 @@ def test_llmisvc_authorized(self, llmisvc_auth_pair):
3232
expected = "rome"
3333

3434
for entry in [entry_a, entry_b]:
35-
status, body = send_chat_completions(entry.service, prompt=prompt, token=entry.token, insecure=False)
35+
status, body = send_chat_completions(
36+
llmisvc=entry.service, prompt=prompt, token=entry.token, insecure=False
37+
)
3638
assert status == 200, f"Authorized request failed with {status}: {body}"
37-
completion = parse_completion_text(body)
39+
completion = parse_completion_text(response_body=body)
3840
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
3941

4042
def test_llmisvc_unauthorized(self, llmisvc_auth_pair):
@@ -49,10 +51,17 @@ def test_llmisvc_unauthorized(self, llmisvc_auth_pair):
4951

5052
# User B's token cannot access user A's service
5153
status, _ = send_chat_completions(
52-
entry_a.service, prompt="What is the capital of Italy?", token=entry_b.token, insecure=False
54+
llmisvc=entry_a.service,
55+
prompt="What is the capital of Italy?",
56+
token=entry_b.token,
57+
insecure=False,
5358
)
5459
assert status in (401, 403), f"Cross-user access should be denied, got {status}"
5560

5661
# No token at all fails
57-
status, _ = send_chat_completions(entry_a.service, prompt="What is the capital of Italy?", insecure=False)
62+
status, _ = send_chat_completions(
63+
llmisvc=entry_a.service,
64+
prompt="What is the capital of Italy?",
65+
insecure=False,
66+
)
5867
assert status in (401, 403), f"No-token access should be denied, got {status}"

tests/model_serving/model_server/llmd/test_llmd_connection_cpu.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
pytestmark = [pytest.mark.tier1, pytest.mark.cpu]
1212

13-
NAMESPACE = ns_from_file(__file__)
13+
NAMESPACE = ns_from_file(file=__file__)
1414

1515

1616
@pytest.mark.parametrize(
@@ -35,7 +35,7 @@ def test_llmd_connection_cpu(self, llmisvc: LLMInferenceService):
3535
prompt = "What is the capital of Italy?"
3636
expected = "rome"
3737

38-
status, body = send_chat_completions(llmisvc, prompt=prompt)
38+
status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
3939
assert status == 200, f"Expected 200, got {status}: {body}"
40-
completion = parse_completion_text(body)
41-
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
40+
completion = parse_completion_text(response_body=body)
41+
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd/test_llmd_connection_gpu.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
pytestmark = [pytest.mark.tier1, pytest.mark.gpu]
1212

13-
NAMESPACE = ns_from_file(__file__)
13+
NAMESPACE = ns_from_file(file=__file__)
1414

1515

1616
@pytest.mark.parametrize(
@@ -43,7 +43,7 @@ def test_llmd_connection_gpu(
4343
prompt = "What is the capital of Italy?"
4444
expected = "rome"
4545

46-
status, body = send_chat_completions(llmisvc, prompt=prompt)
46+
status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
4747
assert status == 200, f"Expected 200, got {status}: {body}"
48-
completion = parse_completion_text(body)
48+
completion = parse_completion_text(response_body=body)
4949
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd/test_llmd_kueue_integration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
pytestmark = [pytest.mark.tier2, pytest.mark.cpu]
1717

18-
NAMESPACE = ns_from_file(__file__)
18+
NAMESPACE = ns_from_file(file=__file__)
1919

2020
# --- Test Configuration ---
2121
LOCAL_QUEUE_NAME = "llmd-local-queue-raw"
@@ -157,5 +157,5 @@ def test_kueue_llmd_scaleup(
157157

158158
status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
159159
assert status == 200, f"Expected 200 after scale-up, got {status}: {body}"
160-
completion = parse_completion_text(body)
160+
completion = parse_completion_text(response_body=body)
161161
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd/test_llmd_no_scheduler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
1212

13-
NAMESPACE = ns_from_file(__file__)
13+
NAMESPACE = ns_from_file(file=__file__)
1414

1515

1616
class S3GpuNoSchedulerConfig(QwenS3Config):
@@ -48,7 +48,7 @@ def test_llmd_no_scheduler(
4848
prompt = "What is the capital of Italy?"
4949
expected = "rome"
5050

51-
status, body = send_chat_completions(llmisvc, prompt=prompt)
51+
status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
5252
assert status == 200, f"Expected 200, got {status}: {body}"
53-
completion = parse_completion_text(body)
53+
completion = parse_completion_text(response_body=body)
5454
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd/test_llmd_prefill_decode.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
1212

13-
NAMESPACE = ns_from_file(__file__)
13+
NAMESPACE = ns_from_file(file=__file__)
1414

1515

1616
@pytest.mark.parametrize(
@@ -40,7 +40,7 @@ def test_llmd_prefill_decode(
4040
prompt = "What is the capital of Italy?"
4141
expected = "rome"
4242

43-
status, body = send_chat_completions(llmisvc, prompt=prompt)
43+
status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
4444
assert status == 200, f"Expected 200, got {status}: {body}"
45-
completion = parse_completion_text(body)
45+
completion = parse_completion_text(response_body=body)
4646
assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"

tests/model_serving/model_server/llmd/test_llmd_singlenode_estimated_prefix_cache.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"and why they are important for understanding the nature of reality at the atomic scale."
2121
)
2222

23-
NAMESPACE = ns_from_file(__file__)
23+
NAMESPACE = ns_from_file(file=__file__)
2424

2525
pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
2626

@@ -32,7 +32,9 @@
3232
)
3333
@pytest.mark.usefixtures("valid_aws_config")
3434
class TestSingleNodeEstimatedPrefixCache:
35-
"""Deploy Qwen on GPU with 2 replicas and estimated prefix cache routing, then verify cache hits via Prometheus metrics."""
35+
"""Deploy Qwen on GPU with 2 replicas and estimated prefix cache routing,
36+
then verify cache hits via Prometheus metrics.
37+
"""
3638

3739
def test_singlenode_estimated_prefix_cache(
3840
self,

tests/model_serving/model_server/llmd/test_llmd_singlenode_precise_prefix_cache.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"and why they are important for understanding the nature of reality at the atomic scale."
2222
)
2323

24-
NAMESPACE = ns_from_file(__file__)
24+
NAMESPACE = ns_from_file(file=__file__)
2525

2626
pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
2727

@@ -33,7 +33,9 @@
3333
)
3434
@pytest.mark.usefixtures("valid_aws_config")
3535
class TestSingleNodePrecisePrefixCache:
36-
"""Deploy Qwen on GPU with 2 replicas and precise prefix cache routing, then verify cache hits via Prometheus metrics."""
36+
"""Deploy Qwen on GPU with 2 replicas and precise prefix cache routing,
37+
then verify cache hits via Prometheus metrics.
38+
"""
3739

3840
def test_singlenode_precise_prefix_cache(
3941
self,

0 commit comments

Comments
 (0)