fix: use headed KServe services in guardrails tests (#874)

adolfo-ab · web-flow · commit 012e35330ce5 · 2025-11-20T16:00:39.000Z
diff --git a/tests/fixtures/inference.py b/tests/fixtures/inference.py
@@ -2,12 +2,16 @@
 
 import pytest
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.data_science_cluster import DataScienceCluster
+from ocp_resources.deployment import Deployment
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.namespace import Namespace
 from ocp_resources.pod import Pod
+from ocp_resources.resource import ResourceEditor
 from ocp_resources.secret import Secret
 from ocp_resources.service import Service
 from ocp_resources.serving_runtime import ServingRuntime
+from pytest_testconfig import py_config
 
 from utilities.constants import (
     RuntimeTemplates,
@@ -16,6 +20,7 @@
     LLMdInferenceSimConfig,
 )
 from utilities.inference_utils import create_isvc
+from utilities.infra import get_data_science_cluster
 from utilities.serving_runtime import ServingRuntimeFromTemplate
 
 
@@ -177,3 +182,24 @@ def llm_d_inference_sim_isvc(
         },
     ) as isvc:
         yield isvc
+
+
+@pytest.fixture(scope="class")
+def kserve_controller_manager_deployment(admin_client: DynamicClient) -> Generator[Deployment, Any, Any]:
+    yield Deployment(
+        client=admin_client,
+        name="kserve-controller-manager",
+        namespace=py_config["applications_namespace"],
+        ensure_exists=True,
+    )
+
+
+@pytest.fixture(scope="class")
+def patched_dsc_kserve_headed(
+    admin_client, kserve_controller_manager_deployment: Deployment
+) -> Generator[DataScienceCluster, None, None]:
+    """Configure KServe Services to work in Headed mode i.e. using the Service port instead of the Pod port"""
+    dsc = get_data_science_cluster(client=admin_client)
+    with ResourceEditor(patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}):
+        kserve_controller_manager_deployment.wait_for_replicas()
+        yield dsc
diff --git a/tests/llama_stack/safety/test_trustyai_fms_provider.py b/tests/llama_stack/safety/test_trustyai_fms_provider.py
@@ -38,7 +38,7 @@
     indirect=True,
 )
 @pytest.mark.rawdeployment
-@pytest.mark.usefixtures("orchestrator_config", "guardrails_orchestrator")
+@pytest.mark.usefixtures("patched_dsc_kserve_headed", "orchestrator_config", "guardrails_orchestrator")
 @pytest.mark.model_explainability
 class TestLlamaStackFMSGuardrailsProvider:
     """
diff --git a/tests/model_explainability/guardrails/test_guardrails.py b/tests/model_explainability/guardrails/test_guardrails.py
@@ -124,7 +124,7 @@ def test_validate_guardrails_orchestrator_images(
 )
 @pytest.mark.smoke
 @pytest.mark.rawdeployment
-@pytest.mark.usefixtures("guardrails_gateway_config")
+@pytest.mark.usefixtures("patched_dsc_kserve_headed", "guardrails_gateway_config")
 class TestGuardrailsOrchestratorWithBuiltInDetectors:
     """
     Tests if basic functions of the GuardrailsOrchestrator are working properly with the built-in (regex) detectors.
@@ -297,6 +297,7 @@ def test_guardrails_builtin_detectors_negative_detection(
 )
 @pytest.mark.rawdeployment
 @pytest.mark.usefixtures(
+    "patched_dsc_kserve_headed",
     "guardrails_gateway_config",
     "minio_pvc_otel",
     "minio_deployment_otel",
@@ -428,6 +429,7 @@ def check_traces():
     ],
     indirect=True,
 )
+@pytest.mark.usefixtures("patched_dsc_kserve_headed")
 @pytest.mark.rawdeployment
 class TestGuardrailsOrchestratorAutoConfig:
     """
@@ -515,6 +517,7 @@ def test_guardrails_autoconfig_negative_detection(
     ],
     indirect=True,
 )
+@pytest.mark.usefixtures("patched_dsc_kserve_headed")
 @pytest.mark.rawdeployment
 class TestGuardrailsOrchestratorAutoConfigWithGateway:
     """
diff --git a/tests/model_explainability/guardrails/utils.py b/tests/model_explainability/guardrails/utils.py
@@ -239,7 +239,7 @@ def create_detector_config(*detector_names: str) -> Dict[str, Dict[str, Any]]:
     }
 
 
-@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=1)
+@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=10)
 def check_guardrails_health_endpoint(
     host,
     token,
@@ -300,7 +300,7 @@ def send_chat_detections_request(
     )
 
 
-@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=1)
+@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=1)
 def send_and_verify_unsuitable_input_detection(
     url: str,
     token: str,
@@ -324,7 +324,7 @@ def send_and_verify_unsuitable_input_detection(
     return response
 
 
-@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=1)
+@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=1)
 def send_and_verify_unsuitable_output_detection(
     url: str,
     token: str,
diff --git a/tests/model_explainability/lm_eval/test_lm_eval.py b/tests/model_explainability/lm_eval/test_lm_eval.py
@@ -114,6 +114,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
     ],
     indirect=True,
 )
+@pytest.mark.usefixtures("patched_dsc_kserve_headed")
 def test_lmeval_vllm_emulator(admin_client, model_namespace, lmevaljob_vllm_emulator_pod):
     """Basic test that verifies LMEval works with vLLM using a vLLM emulator for more efficient evaluation"""
     validate_lmeval_job_pod_and_logs(lmevaljob_pod=lmevaljob_vllm_emulator_pod)
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -460,7 +460,7 @@ class OpenVINO:
 CHAT_GENERATION_CONFIG: Dict[str, Any] = {
     "service": {
         "hostname": f"{QWEN_MODEL_NAME}-predictor",
-        "port": 8032,
+        "port": 80,
         "request_timeout": 600,
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@`
`38`	`38`	`indirect=True,`
`39`	`39`	`)`
`40`	`40`	`@pytest.mark.rawdeployment`
`41`		`-@pytest.mark.usefixtures("orchestrator_config", "guardrails_orchestrator")`
	`41`	`+@pytest.mark.usefixtures("patched_dsc_kserve_headed", "orchestrator_config", "guardrails_orchestrator")`
`42`	`42`	`@pytest.mark.model_explainability`
`43`	`43`	`class TestLlamaStackFMSGuardrailsProvider:`
`44`	`44`	`"""`
Original file line number	Diff line number	Diff line change
`@@ -114,6 +114,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(`
`114`	`114`	`],`
`115`	`115`	`indirect=True,`
`116`	`116`	`)`
	`117`	`+@pytest.mark.usefixtures("patched_dsc_kserve_headed")`
`117`	`118`	`def test_lmeval_vllm_emulator(admin_client, model_namespace, lmevaljob_vllm_emulator_pod):`
`118`	`119`	`"""Basic test that verifies LMEval works with vLLM using a vLLM emulator for more efficient evaluation"""`
`119`	`120`	`validate_lmeval_job_pod_and_logs(lmevaljob_pod=lmevaljob_vllm_emulator_pod)`
Original file line number	Diff line number	Diff line change
`@@ -460,7 +460,7 @@ class OpenVINO:`
`460`	`460`	`CHAT_GENERATION_CONFIG: Dict[str, Any] = {`
`461`	`461`	`"service": {`
`462`	`462`	`"hostname": f"{QWEN_MODEL_NAME}-predictor",`
`463`		`- "port": 8032,`
	`463`	`+ "port": 80,`
`464`	`464`	`"request_timeout": 600,`
`465`	`465`	`}`
`466`	`466`	`}`