[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 1546283c9e0f · 2026-03-19T16:16:44.000Z
for more information, see https://pre-commit.ci
diff --git a/tests/fixtures/guardrails.py b/tests/fixtures/guardrails.py
@@ -13,8 +13,8 @@
 from ocp_resources.resource import ResourceEditor
 from ocp_resources.route import Route
 
-from tests.model_explainability.guardrails.constants import PROMPT_INJECTION_DETECTOR, HAP_DETECTOR
-from utilities.constants import Annotations, Labels, VLLMGPUConfig, BUILTIN_DETECTOR_CONFIG
+from tests.model_explainability.guardrails.constants import HAP_DETECTOR, PROMPT_INJECTION_DETECTOR
+from utilities.constants import BUILTIN_DETECTOR_CONFIG, Annotations, Labels, VLLMGPUConfig
 from utilities.guardrails import check_guardrails_health_endpoint
 
 GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"
@@ -220,6 +220,7 @@ def guardrails_orchestrator_gateway_route(
         ensure_exists=True,
     )
 
+
 def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
     return {
         "service": {
@@ -228,6 +229,7 @@ def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
         }
     }
 
+
 @pytest.fixture(scope="class")
 def orchestrator_config_gpu(
     request: FixtureRequest,
@@ -285,6 +287,7 @@ def orchestrator_config_gpu(
         ) as cm:
             yield cm
 
+
 @pytest.fixture(scope="class")
 def orchestrator_config_builtin_gpu(
     request: FixtureRequest,
diff --git a/tests/fixtures/inference.py b/tests/fixtures/inference.py
@@ -2,10 +2,7 @@
 from typing import Any
 
 import pytest
-import yaml
-from _pytest.fixtures import FixtureRequest
 from kubernetes.dynamic import DynamicClient
-from ocp_resources.config_map import ConfigMap
 from ocp_resources.data_science_cluster import DataScienceCluster
 from ocp_resources.deployment import Deployment
 from ocp_resources.inference_service import InferenceService
@@ -19,12 +16,12 @@
 from simple_logger.logger import get_logger
 from timeout_sampler import retry
 
-from tests.model_explainability.guardrails.constants import PROMPT_INJECTION_DETECTOR, HAP_DETECTOR
 from utilities.constants import (
     QWEN_MODEL_NAME,
     KServeDeploymentType,
     LLMdInferenceSimConfig,
-    RuntimeTemplates, VLLMGPUConfig, BUILTIN_DETECTOR_CONFIG,
+    RuntimeTemplates,
+    VLLMGPUConfig,
 )
 from utilities.inference_utils import create_isvc
 from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
@@ -250,6 +247,7 @@ def _wait_for_kserve_upgrade(dsc_resource: DataScienceCluster):
         LOGGER.info("DSC already configured for Headed mode")
         yield dsc
 
+
 @pytest.fixture(scope="class")
 def vllm_gpu_runtime(
     admin_client: DynamicClient,
@@ -263,28 +261,25 @@ def vllm_gpu_runtime(
         template_name=RuntimeTemplates.VLLM_CUDA,
         deployment_type=KServeDeploymentType.RAW_DEPLOYMENT,
         runtime_image="registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:ec799bb5eeb7e25b4b25a8917ab5161da6b6f1ab830cbba61bba371cffb0c34d",
-            containers={
-                "kserve-container": {
-                    "command": ["python", "-m", "vllm.entrypoints.openai.api_server"],
-                    "args": [
-                        "--port=8080",
-                        "--model=/mnt/models",
-                        "--tokenizer=/mnt/models",
-                        "--served-model-name={{.Name}}",
-                        "--dtype=float16",
-                        "--enforce-eager",
-                    ],
-                    "ports": [{"containerPort": 8080, "protocol": "TCP"}],
-                    "resources": {
-                        "limits": {
-                            "nvidia.com/gpu": "1"
-                        }
-                    },
-                }
+        containers={
+            "kserve-container": {
+                "command": ["python", "-m", "vllm.entrypoints.openai.api_server"],
+                "args": [
+                    "--port=8080",
+                    "--model=/mnt/models",
+                    "--tokenizer=/mnt/models",
+                    "--served-model-name={{.Name}}",
+                    "--dtype=float16",
+                    "--enforce-eager",
+                ],
+                "ports": [{"containerPort": 8080, "protocol": "TCP"}],
+                "resources": {"limits": {"nvidia.com/gpu": "1"}},
             }
+        },
     ) as runtime:
         yield runtime
 
+
 @pytest.fixture(scope="class")
 def qwen_gpu_isvc(
     admin_client: DynamicClient,
@@ -302,21 +297,22 @@ def qwen_gpu_isvc(
         storage_uri="oci://quay.io/trustyai_testing/models/qwen2.5-3b-instruct@sha256:6f9d9843599a9959de23c76d6b5adb556505482a7e732b2fcbca695a9c4ce545",
         enable_auth=False,
         wait_for_predictor_pods=True,
-            resources={
-                "requests": {
-                    "cpu": "2",
-                    "memory": "8Gi",
-                    "nvidia.com/gpu": "1",
-                },
-                "limits": {
-                    "cpu": "4",
-                    "memory": "12Gi",
-                    "nvidia.com/gpu": "1",
-                },
+        resources={
+            "requests": {
+                "cpu": "2",
+                "memory": "8Gi",
+                "nvidia.com/gpu": "1",
             },
+            "limits": {
+                "cpu": "4",
+                "memory": "12Gi",
+                "nvidia.com/gpu": "1",
+            },
+        },
     ) as isvc:
         yield isvc
 
+
 def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
     return {
         "service": {
diff --git a/tests/model_explainability/guardrails/test_guardrails_gpu.py b/tests/model_explainability/guardrails/test_guardrails_gpu.py
@@ -8,9 +8,10 @@
     HARMLESS_PROMPT,
     PII_ENDPOINT,
     PII_INPUT_DETECTION_PROMPT,
+    PII_OUTPUT_DETECTION_PROMPT_QWEN,
     PROMPT_INJECTION_DETECTOR,
     PROMPT_INJECTION_INPUT_DETECTION_PROMPT,
-    STANDALONE_DETECTION_ENDPOINT, PII_OUTPUT_DETECTION_PROMPT_QWEN,
+    STANDALONE_DETECTION_ENDPOINT,
 )
 from tests.model_explainability.guardrails.utils import (
     create_detector_config,
@@ -20,12 +21,12 @@
     send_and_verify_unsuitable_output_detection,
     verify_health_info_response,
 )
-
 from utilities.constants import (
-   VLLMGPUConfig,
+    VLLMGPUConfig,
 )
 from utilities.plugins.constant import OpenAIEnpoints
 
+
 @pytest.mark.parametrize(
     "model_namespace, orchestrator_config_builtin_gpu, guardrails_gateway_config, guardrails_orchestrator",
     [
@@ -160,6 +161,7 @@ def test_guardrails_builtin_detectors_negative_detection(
             model=VLLMGPUConfig.model_name,
         )
 
+
 @pytest.mark.gpu
 @pytest.mark.rawdeployment
 @pytest.mark.usefixtures("patched_dsc_kserve_headed", "guardrails_gateway_config")
@@ -201,7 +203,6 @@ def test_guardrails_builtin_detectors_negative_detection(
     ],
     indirect=True,
 )
-
 class TestGuardrailsOrchestratorHuggingFaceGPU:
     """
     These tests verify that the GuardrailsOrchestrator works as expected when using HuggingFace detectors
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -496,6 +496,8 @@ class LLMdInferenceSimConfig:
 LLM_D_CHAT_GENERATION_CONFIG: dict[str, Any] = {
     "service": {"hostname": f"{LLMdInferenceSimConfig.isvc_name}-predictor", "port": 80}
 }
+
+
 class VLLMGPUConfig:
     name: str = "vllm-gpu"
     port: int = 80
@@ -507,12 +509,11 @@ class VLLMGPUConfig:
     def get_hostname(cls, namespace: str) -> str:
         return f"{cls.isvc_name}-predictor.{namespace}.svc.cluster.local"
 
+
 VLLM_CHAT_GENERATION_CONFIG: dict[str, Any] = {
-    "service": {
-        "hostname": VLLMGPUConfig.get_hostname("test-guardrails-huggingface"),
-        "port": VLLMGPUConfig.port
-    }
+    "service": {"hostname": VLLMGPUConfig.get_hostname("test-guardrails-huggingface"), "port": VLLMGPUConfig.port}
 }
 
+
 class PodNotFound(Exception):
     """Pod not found"""