run precommit

threcc · threcc · commit 417259508433 · 2026-03-10T16:28:30.000+01:00
diff --git a/tests/model_serving/model_server/llmd/conftest.py b/tests/model_serving/model_server/llmd/conftest.py
@@ -6,8 +6,6 @@
 
 import pytest
 import yaml
-
-logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
 from _pytest.fixtures import FixtureRequest
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.config_map import ConfigMap
@@ -19,17 +17,18 @@
 from ocp_resources.service_account import ServiceAccount
 from simple_logger.logger import get_logger
 
-from tests.model_serving.model_server.llmd_v2.llmd_configs import TinyLlamaOciConfig
-from tests.model_serving.model_server.llmd_v2.utils import wait_for_llmisvc
+from tests.model_serving.model_server.llmd.llmd_configs import TinyLlamaOciConfig
+from tests.model_serving.model_server.llmd.utils import wait_for_llmisvc
 from utilities.constants import Timeout
 from utilities.infra import create_inference_token, s3_endpoint_secret, update_configmap_data
 from utilities.llmd_constants import LLMDGateway
 from utilities.llmd_utils import create_llmd_gateway
 from utilities.logger import RedactedString
 
 LOGGER = get_logger(name=__name__)
+logging.getLogger("timeout_sampler").setLevel(logging.WARNING)
 
-AuthEntry = namedtuple("AuthEntry", ["service", "token"])
+AuthEntry = namedtuple(typename="AuthEntry", field_names=["service", "token"])
 
 
 # ===========================================
diff --git a/tests/model_serving/model_server/llmd/llmd_configs/__init__.py b/tests/model_serving/model_server/llmd/llmd_configs/__init__.py
@@ -7,11 +7,11 @@
 __all__ = [
     "EstimatedPrefixCacheConfig",
     "LLMISvcConfig",
-    "TinyLlamaHfConfig",
     "PrecisePrefixCacheConfig",
     "PrefillDecodeConfig",
     "QwenHfConfig",
     "QwenS3Config",
+    "TinyLlamaHfConfig",
     "TinyLlamaOciConfig",
     "TinyLlamaS3Config",
 ]
diff --git a/tests/model_serving/model_server/llmd/test_llmd_auth.py b/tests/model_serving/model_server/llmd/test_llmd_auth.py
@@ -8,7 +8,7 @@
 
 pytestmark = [pytest.mark.tier1, pytest.mark.cpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 @pytest.mark.parametrize(
@@ -32,9 +32,11 @@ def test_llmisvc_authorized(self, llmisvc_auth_pair):
         expected = "rome"
 
         for entry in [entry_a, entry_b]:
-            status, body = send_chat_completions(entry.service, prompt=prompt, token=entry.token, insecure=False)
+            status, body = send_chat_completions(
+                llmisvc=entry.service, prompt=prompt, token=entry.token, insecure=False
+            )
             assert status == 200, f"Authorized request failed with {status}: {body}"
-            completion = parse_completion_text(body)
+            completion = parse_completion_text(response_body=body)
             assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
 
     def test_llmisvc_unauthorized(self, llmisvc_auth_pair):
@@ -49,10 +51,17 @@ def test_llmisvc_unauthorized(self, llmisvc_auth_pair):
 
         # User B's token cannot access user A's service
         status, _ = send_chat_completions(
-            entry_a.service, prompt="What is the capital of Italy?", token=entry_b.token, insecure=False
+            llmisvc=entry_a.service,
+            prompt="What is the capital of Italy?",
+            token=entry_b.token,
+            insecure=False,
         )
         assert status in (401, 403), f"Cross-user access should be denied, got {status}"
 
         # No token at all fails
-        status, _ = send_chat_completions(entry_a.service, prompt="What is the capital of Italy?", insecure=False)
+        status, _ = send_chat_completions(
+            llmisvc=entry_a.service,
+            prompt="What is the capital of Italy?",
+            insecure=False,
+        )
         assert status in (401, 403), f"No-token access should be denied, got {status}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_connection_cpu.py b/tests/model_serving/model_server/llmd/test_llmd_connection_cpu.py
@@ -10,7 +10,7 @@
 
 pytestmark = [pytest.mark.tier1, pytest.mark.cpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 @pytest.mark.parametrize(
@@ -35,7 +35,7 @@ def test_llmd_connection_cpu(self, llmisvc: LLMInferenceService):
         prompt = "What is the capital of Italy?"
         expected = "rome"
 
-        status, body = send_chat_completions(llmisvc, prompt=prompt)
+        status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200, got {status}: {body}"
-        completion = parse_completion_text(body)
-        assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
+        completion = parse_completion_text(response_body=body)
+        assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_connection_gpu.py b/tests/model_serving/model_server/llmd/test_llmd_connection_gpu.py
@@ -10,7 +10,7 @@
 
 pytestmark = [pytest.mark.tier1, pytest.mark.gpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 @pytest.mark.parametrize(
@@ -43,7 +43,7 @@ def test_llmd_connection_gpu(
         prompt = "What is the capital of Italy?"
         expected = "rome"
 
-        status, body = send_chat_completions(llmisvc, prompt=prompt)
+        status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200, got {status}: {body}"
-        completion = parse_completion_text(body)
+        completion = parse_completion_text(response_body=body)
         assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_kueue_integration.py b/tests/model_serving/model_server/llmd/test_llmd_kueue_integration.py
@@ -15,7 +15,7 @@
 
 pytestmark = [pytest.mark.tier2, pytest.mark.cpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 # --- Test Configuration ---
 LOCAL_QUEUE_NAME = "llmd-local-queue-raw"
@@ -157,5 +157,5 @@ def test_kueue_llmd_scaleup(
 
         status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200 after scale-up, got {status}: {body}"
-        completion = parse_completion_text(body)
+        completion = parse_completion_text(response_body=body)
         assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_no_scheduler.py b/tests/model_serving/model_server/llmd/test_llmd_no_scheduler.py
@@ -10,7 +10,7 @@
 
 pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 class S3GpuNoSchedulerConfig(QwenS3Config):
@@ -48,7 +48,7 @@ def test_llmd_no_scheduler(
         prompt = "What is the capital of Italy?"
         expected = "rome"
 
-        status, body = send_chat_completions(llmisvc, prompt=prompt)
+        status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200, got {status}: {body}"
-        completion = parse_completion_text(body)
+        completion = parse_completion_text(response_body=body)
         assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_prefill_decode.py b/tests/model_serving/model_server/llmd/test_llmd_prefill_decode.py
@@ -10,7 +10,7 @@
 
 pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 @pytest.mark.parametrize(
@@ -40,7 +40,7 @@ def test_llmd_prefill_decode(
         prompt = "What is the capital of Italy?"
         expected = "rome"
 
-        status, body = send_chat_completions(llmisvc, prompt=prompt)
+        status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200, got {status}: {body}"
-        completion = parse_completion_text(body)
+        completion = parse_completion_text(response_body=body)
         assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/test_llmd_singlenode_estimated_prefix_cache.py b/tests/model_serving/model_server/llmd/test_llmd_singlenode_estimated_prefix_cache.py
@@ -20,7 +20,7 @@
     "and why they are important for understanding the nature of reality at the atomic scale."
 )
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
 
@@ -32,7 +32,9 @@
 )
 @pytest.mark.usefixtures("valid_aws_config")
 class TestSingleNodeEstimatedPrefixCache:
-    """Deploy Qwen on GPU with 2 replicas and estimated prefix cache routing, then verify cache hits via Prometheus metrics."""
+    """Deploy Qwen on GPU with 2 replicas and estimated prefix cache routing,
+    then verify cache hits via Prometheus metrics.
+    """
 
     def test_singlenode_estimated_prefix_cache(
         self,
diff --git a/tests/model_serving/model_server/llmd/test_llmd_singlenode_precise_prefix_cache.py b/tests/model_serving/model_server/llmd/test_llmd_singlenode_precise_prefix_cache.py
@@ -21,7 +21,7 @@
     "and why they are important for understanding the nature of reality at the atomic scale."
 )
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 pytestmark = [pytest.mark.tier2, pytest.mark.gpu]
 
@@ -33,7 +33,9 @@
 )
 @pytest.mark.usefixtures("valid_aws_config")
 class TestSingleNodePrecisePrefixCache:
-    """Deploy Qwen on GPU with 2 replicas and precise prefix cache routing, then verify cache hits via Prometheus metrics."""
+    """Deploy Qwen on GPU with 2 replicas and precise prefix cache routing,
+    then verify cache hits via Prometheus metrics.
+    """
 
     def test_singlenode_precise_prefix_cache(
         self,
diff --git a/tests/model_serving/model_server/llmd/test_llmd_smoke.py b/tests/model_serving/model_server/llmd/test_llmd_smoke.py
@@ -10,7 +10,7 @@
 
 pytestmark = [pytest.mark.smoke, pytest.mark.cpu]
 
-NAMESPACE = ns_from_file(__file__)
+NAMESPACE = ns_from_file(file=__file__)
 
 
 @pytest.mark.parametrize(
@@ -36,5 +36,5 @@ def test_llmd_smoke(
 
         status, body = send_chat_completions(llmisvc=llmisvc, prompt=prompt)
         assert status == 200, f"Expected 200, got {status}: {body}"
-        completion = parse_completion_text(body)
+        completion = parse_completion_text(response_body=body)
         assert expected in completion.lower(), f"Expected '{expected}' in response, got: {completion}"
diff --git a/tests/model_serving/model_server/llmd/utils.py b/tests/model_serving/model_server/llmd/utils.py
@@ -142,12 +142,12 @@ def send_chat_completions(
 ) -> tuple[int, str]:
     """Send a chat completion request. Returns (status_code, response_body)."""
     url = _get_inference_url(llmisvc) + "/v1/chat/completions"
-    model_name = _get_model_name(llmisvc)
-    body = _build_chat_body(model_name, prompt)
+    model_name = _get_model_name(llmisvc=llmisvc)
+    body = _build_chat_body(model_name=model_name, prompt=prompt)
     ca_cert = None if insecure else _resolve_ca_cert(llmisvc.client)
 
     LOGGER.info(f"Sending inference request to {llmisvc.name} — URL: {url}, Model: {model_name}")
-    status_code, response_body = _curl_post(url, body, token=token, ca_cert=ca_cert)
+    status_code, response_body = _curl_post(url=url, body=body, token=token, ca_cert=ca_cert)
     LOGGER.info(f"Inference response — status={status_code}\n{response_body}")
     return status_code, response_body
 
@@ -238,7 +238,12 @@ def assert_prefix_cache_routing(
     block_size: int = 64,
 ) -> bool:
     """Assert all traffic routed to 1 pod with correct cache hits. Retries for metric delay."""
-    requests = query_metric_by_pod(prometheus, "kserve_vllm:request_success_total", llmisvc, pods)
+    requests = query_metric_by_pod(
+        prometheus=prometheus,
+        metric_name="kserve_vllm:request_success_total",
+        llmisvc=llmisvc,
+        pods=pods,
+    )
     LOGGER.info(f"Request count by pod: {requests}")
 
     pods_with_traffic = [p for p, count in requests.items() if count > 0]
@@ -249,7 +254,12 @@ def assert_prefix_cache_routing(
         f"Expected {expected_requests} requests on '{active_pod}', got {requests[active_pod]}"
     )
 
-    hits = query_metric_by_pod(prometheus, "kserve_vllm:prefix_cache_hits_total", llmisvc, pods)
+    hits = query_metric_by_pod(
+        prometheus=prometheus,
+        metric_name="kserve_vllm:prefix_cache_hits_total",
+        llmisvc=llmisvc,
+        pods=pods,
+    )
     LOGGER.info(f"Prefix cache hits by pod: {hits}")
 
     expected_hits = (expected_requests - 1) * block_size
@@ -279,7 +289,7 @@ def send_prefix_cache_requests(
     successful = 0
     for i in range(count):
         try:
-            status, _ = send_chat_completions(llmisvc, prompt=prompt, token=token, insecure=False)
+            status, _ = send_chat_completions(llmisvc=llmisvc, prompt=prompt, token=token, insecure=False)
             if status == 200:
                 successful += 1
         except Exception as e:  # noqa: BLE001