feat: add guardrails healthcheck to each endpoint test (#1014)

sheltoncyril · sheltoncyril · commit 0254d9ce9ec5 · 2026-01-15T09:00:03.000Z
* feat: add guardrails healthcheck to each endpoint test

* feat: remove time.sleep call
diff --git a/tests/fixtures/guardrails.py b/tests/fixtures/guardrails.py
@@ -12,6 +12,7 @@
 from ocp_resources.route import Route
 
 from utilities.constants import Labels, Annotations
+from utilities.guardrails import check_guardrails_health_endpoint
 
 GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"
 
@@ -160,6 +161,17 @@ def guardrails_orchestrator_health_route(
         yield guardrails_orchestrator_health_route
 
 
+@pytest.fixture
+def guardrails_healthcheck(
+    current_client_token, openshift_ca_bundle_file, guardrails_orchestrator_health_route: Route
+) -> None:
+    check_guardrails_health_endpoint(
+        token=current_client_token,
+        host=guardrails_orchestrator_health_route.host,
+        ca_bundle_file=openshift_ca_bundle_file,
+    )
+
+
 @pytest.fixture(scope="class")
 def guardrails_orchestrator_gateway_route(
     admin_client: DynamicClient,
diff --git a/tests/fixtures/inference.py b/tests/fixtures/inference.py
@@ -12,17 +12,22 @@
 from ocp_resources.service import Service
 from ocp_resources.serving_runtime import ServingRuntime
 from pytest_testconfig import py_config
+from simple_logger.logger import get_logger
 
 from utilities.constants import (
     RuntimeTemplates,
     KServeDeploymentType,
     QWEN_MODEL_NAME,
     LLMdInferenceSimConfig,
 )
+from timeout_sampler import retry
+
 from utilities.inference_utils import create_isvc
-from utilities.infra import get_data_science_cluster
+from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
 from utilities.serving_runtime import ServingRuntimeFromTemplate
 
+LOGGER = get_logger(name=__name__)
+
 
 @pytest.fixture(scope="class")
 def vllm_cpu_runtime(
@@ -130,8 +135,8 @@ def llm_d_inference_sim_serving_runtime(
         containers=[
             {
                 "name": "kserve-container",
-                "image": "quay.io/trustyai_testing/llmd-inference-sim-dataset-builtin"
-                "@sha256:dfaa32cf0878a2fb522133e34369412c90e8ffbfa18b690b92602cf7c019fbbe",
+                "image": "quay.io/trustyai_testing/llm-d-inference-sim-dataset-builtin"
+                "@sha256:79e525cfd57a0d72b7e71d5f1e2dd398eca9315cfbd061d9d3e535b1ae736239",
                 "imagePullPolicy": "Always",
                 "args": ["--model", LLMdInferenceSimConfig.model_name, "--port", str(LLMdInferenceSimConfig.port)],
                 "ports": [{"containerPort": LLMdInferenceSimConfig.port, "protocol": "TCP"}],
@@ -165,6 +170,7 @@ def llm_d_inference_sim_isvc(
     admin_client: DynamicClient,
     model_namespace: Namespace,
     llm_d_inference_sim_serving_runtime: ServingRuntime,
+    patched_dsc_kserve_headed: DataScienceCluster,
 ) -> Generator[InferenceService, Any, Any]:
     with create_isvc(
         client=admin_client,
@@ -199,7 +205,30 @@ def patched_dsc_kserve_headed(
     admin_client, kserve_controller_manager_deployment: Deployment
 ) -> Generator[DataScienceCluster, None, None]:
     """Configure KServe Services to work in Headed mode i.e. using the Service port instead of the Pod port"""
-    dsc = get_data_science_cluster(client=admin_client)
-    with ResourceEditor(patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}):
+
+    def _kserve_last_transition_time(dsc_resource: DataScienceCluster) -> str:
+        return next(
+            filter(lambda condition: condition["type"] == "KserveReady", dsc_resource.instance.status["conditions"])
+        )["lastTransitionTime"]
+
+    @retry(wait_timeout=30, sleep=5)
+    def _wait_for_headed_entities_status_ready(kserve_last_transition_time: str, dsc_resource: DataScienceCluster):
+        if kserve_last_transition_time == _kserve_last_transition_time(dsc_resource):
+            return False
         kserve_controller_manager_deployment.wait_for_replicas()
+        wait_for_dsc_status_ready(dsc_resource=dsc_resource)
+        return True
+
+    dsc = get_data_science_cluster(client=admin_client)
+    if not dsc.instance.spec.components.kserve.rawDeploymentServiceConfig == "Headed":
+        kserve_pre_transition_time = _kserve_last_transition_time(dsc_resource=dsc)
+        with ResourceEditor(
+            patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}
+        ):
+            _wait_for_headed_entities_status_ready(
+                kserve_last_transition_time=kserve_pre_transition_time, dsc_resource=dsc
+            )
+            yield dsc
+    else:
+        LOGGER.info("DSC already configured for Headed mode")
         yield dsc
diff --git a/tests/model_explainability/guardrails/test_guardrails.py b/tests/model_explainability/guardrails/test_guardrails.py
@@ -13,7 +13,6 @@
 )
 from tests.model_explainability.guardrails.utils import (
     create_detector_config,
-    check_guardrails_health_endpoint,
     verify_health_info_response,
     send_and_verify_unsuitable_input_detection,
     send_and_verify_unsuitable_output_detection,
@@ -137,28 +136,14 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
          query directly to the model without performing any detection.
     """
 
-    def test_guardrails_health_endpoint(
-        self,
-        current_client_token,
-        openshift_ca_bundle_file,
-        llm_d_inference_sim_isvc,
-        orchestrator_config,
-        guardrails_orchestrator_health_route,
-    ):
-        response = check_guardrails_health_endpoint(
-            host=guardrails_orchestrator_health_route.host,
-            token=current_client_token,
-            ca_bundle_file=openshift_ca_bundle_file,
-        )
-        assert "fms-guardrails-orchestr8" in response.text
-
     def test_guardrails_info_endpoint(
         self,
         current_client_token,
         openshift_ca_bundle_file,
         llm_d_inference_sim_isvc,
         orchestrator_config,
         guardrails_orchestrator_health_route,
+        guardrails_healthcheck,
     ):
         verify_health_info_response(
             host=guardrails_orchestrator_health_route.host,
@@ -173,6 +158,7 @@ def test_guardrails_builtin_detectors_unsuitable_input(
         llm_d_inference_sim_isvc,
         orchestrator_config,
         guardrails_orchestrator_gateway_route,
+        guardrails_healthcheck,
     ):
         send_and_verify_unsuitable_input_detection(
             url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -189,6 +175,7 @@ def test_guardrails_builtin_detectors_unsuitable_output(
         llm_d_inference_sim_isvc,
         orchestrator_config,
         guardrails_orchestrator_gateway_route,
+        guardrails_healthcheck,
     ):
         send_and_verify_unsuitable_output_detection(
             url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -218,6 +205,7 @@ def test_guardrails_builtin_detectors_negative_detection(
         guardrails_orchestrator_gateway_route,
         message,
         url_path,
+        guardrails_healthcheck,
     ):
         send_and_verify_negative_detection(
             url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -331,6 +319,7 @@ def test_guardrails_multi_detector_unsuitable_input(
         guardrails_orchestrator,
         otel_collector,
         tempo_stack,
+        guardrails_healthcheck,
     ):
         for prompt in [PROMPT_INJECTION_INPUT_DETECTION_PROMPT, HAP_INPUT_DETECTION_PROMPT]:
             send_and_verify_unsuitable_input_detection(
@@ -353,6 +342,7 @@ def test_guardrails_multi_detector_negative_detection(
         openshift_ca_bundle_file,
         otel_collector,
         tempo_stack,
+        guardrails_healthcheck,
     ):
         send_and_verify_negative_detection(
             url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
@@ -373,6 +363,7 @@ def test_guardrails_standalone_detector_endpoint(
         hap_detector_route,
         otel_collector,
         tempo_stack,
+        guardrails_healthcheck,
     ):
         send_and_verify_standalone_detection(
             url=f"https://{guardrails_orchestrator_route.host}/{STANDALONE_DETECTION_ENDPOINT}",
@@ -393,6 +384,7 @@ def test_guardrails_traces_in_tempo(
         otel_collector,
         tempo_stack,
         tempo_traces_service_portforward,
+        guardrails_healthcheck,
     ):
         """
         Ensure that OpenTelemetry traces from Guardrails Orchestrator are collected in Tempo.
@@ -437,29 +429,13 @@ class TestGuardrailsOrchestratorAutoConfig:
     These tests verify that the GuardrailsOrchestrator works as expected when configured through the AutoConfig feature.
     """
 
-    def test_guardrails_gateway_health_endpoint(
-        self,
-        current_client_token,
-        llm_d_inference_sim_isvc,
-        prompt_injection_detector_route,
-        hap_detector_route,
-        openshift_ca_bundle_file,
-        guardrails_orchestrator,
-        guardrails_orchestrator_health_route,
-    ):
-        response = check_guardrails_health_endpoint(
-            host=guardrails_orchestrator_health_route.host,
-            token=current_client_token,
-            ca_bundle_file=openshift_ca_bundle_file,
-        )
-        assert "fms-guardrails-orchestr8" in response.text
-
     def test_guardrails_gateway_info_endpoint(
         self,
         current_client_token,
         openshift_ca_bundle_file,
         llm_d_inference_sim_isvc,
         guardrails_orchestrator_health_route,
+        guardrails_healthcheck,
     ):
         verify_health_info_response(
             host=guardrails_orchestrator_health_route.host,
@@ -473,6 +449,7 @@ def test_guardrails_autoconfig_unsuitable_input(
         openshift_ca_bundle_file,
         llm_d_inference_sim_isvc,
         guardrails_orchestrator_route,
+        guardrails_healthcheck,
     ):
         for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
             send_and_verify_unsuitable_input_detection(
@@ -490,6 +467,7 @@ def test_guardrails_autoconfig_negative_detection(
         llm_d_inference_sim_isvc,
         guardrails_orchestrator_route,
         openshift_ca_bundle_file,
+        guardrails_healthcheck,
     ):
         send_and_verify_negative_detection(
             url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
@@ -526,29 +504,15 @@ class TestGuardrailsOrchestratorAutoConfigWithGateway:
     through the AutoConfig feature to use the gateway route.
     """
 
-    def test_guardrails_autoconfig_gateway_health_endpoint(
-        self,
-        current_client_token,
-        llm_d_inference_sim_isvc,
-        prompt_injection_detector_route,
-        hap_detector_route,
-        openshift_ca_bundle_file,
-        guardrails_orchestrator,
-        guardrails_orchestrator_health_route,
-    ):
-        response = check_guardrails_health_endpoint(
-            host=guardrails_orchestrator_health_route.host,
-            token=current_client_token,
-            ca_bundle_file=openshift_ca_bundle_file,
-        )
-        assert "fms-guardrails-orchestr8" in response.text
-
     def test_guardrails_autoconfig_gateway_info_endpoint(
         self,
         current_client_token,
         openshift_ca_bundle_file,
         llm_d_inference_sim_isvc,
+        hap_detector_isvc,
+        prompt_injection_detector_isvc,
         guardrails_orchestrator_health_route,
+        guardrails_healthcheck,
     ):
         verify_health_info_response(
             host=guardrails_orchestrator_health_route.host,
@@ -561,7 +525,10 @@ def test_guardrails_autoconfig_gateway_unsuitable_input(
         current_client_token,
         openshift_ca_bundle_file,
         llm_d_inference_sim_isvc,
+        prompt_injection_detector_isvc,
+        hap_detector_isvc,
         guardrails_orchestrator_gateway_route,
+        guardrails_healthcheck,
     ):
         for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
             send_and_verify_unsuitable_input_detection(
@@ -588,10 +555,13 @@ def test_guardrails_autoconfig_gateway_negative_detection(
         self,
         current_client_token,
         llm_d_inference_sim_isvc,
+        prompt_injection_detector_isvc,
+        hap_detector_isvc,
         guardrails_orchestrator_gateway_route,
         openshift_ca_bundle_file,
         url_path,
         message,
+        guardrails_healthcheck,
     ):
         send_and_verify_negative_detection(
             url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",
diff --git a/tests/model_explainability/guardrails/utils.py b/tests/model_explainability/guardrails/utils.py
@@ -8,15 +8,13 @@
 
 from timeout_sampler import retry
 
+from utilities.exceptions import UnexpectedValueError
+from utilities.guardrails import get_auth_headers
 from tests.model_explainability.guardrails.constants import GuardrailsDetectionPrompt
 
 LOGGER = get_logger(name=__name__)
 
 
-def get_auth_headers(token: str) -> Dict[str, str]:
-    return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
-
-
 def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     """
     Constructs a chat detections payload for a given content string.
@@ -129,9 +127,15 @@ def verify_builtin_detector_unsuitable_input_response(
     response_data = verify_and_parse_response(response=response)
     errors = []
 
-    warnings = response_data.get("warnings", [])
+    if not response_data:
+        raise UnexpectedValueError("Expected non-empty response data but got an empty response.")
+
+    warnings = response_data.get("warnings")
     unsuitable_input_warning: str = "UNSUITABLE_INPUT"
-    if len(warnings) != 1:
+
+    if warnings is None:
+        raise UnexpectedValueError("Expected warnings in response, got None")
+    elif len(warnings) != 1:
         errors.append(f"Expected 1 warning in response, got {len(warnings)}")
     elif warnings[0]["type"] != unsuitable_input_warning:
         errors.append(f"Expected warning type {unsuitable_input_warning}, got {warnings[0]['type']}")
@@ -239,20 +243,6 @@ def create_detector_config(*detector_names: str) -> Dict[str, Dict[str, Any]]:
     }
 
 
-@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=10)
-def check_guardrails_health_endpoint(
-    host,
-    token,
-    ca_bundle_file,
-):
-    response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
-    if response.status_code == http.HTTPStatus.OK:
-        return response
-    raise TimeoutError(
-        f"Timeout waiting GuardrailsOrchestrator to be healthy. Response status code: {response.status_code}"
-    )
-
-
 def verify_health_info_response(host, token, ca_bundle_file):
     response = requests.get(url=f"https://{host}/info", headers=get_auth_headers(token=token), verify=ca_bundle_file)
     assert response.status_code == http.HTTPStatus.OK
@@ -300,7 +290,7 @@ def send_chat_detections_request(
     )
 
 
-@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=1)
+@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=4)
 def send_and_verify_unsuitable_input_detection(
     url: str,
     token: str,
diff --git a/utilities/exceptions.py b/utilities/exceptions.py
@@ -136,3 +136,7 @@ class MissingParameter(Exception):
 
 class ExceptionUserLogin(Exception):
     pass
+
+
+class UnexpectedValueError(Exception):
+    """Unexpected value found"""
diff --git a/utilities/guardrails.py b/utilities/guardrails.py
@@ -0,0 +1,19 @@
+import requests
+import http
+from typing import Dict
+
+from timeout_sampler import retry
+
+
+def get_auth_headers(token: str) -> Dict[str, str]:
+    return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
+
+
+@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=2)
+def check_guardrails_health_endpoint(
+    host: str,
+    token: str,
+    ca_bundle_file: str,
+) -> bool:
+    response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
+    return response.status_code == http.HTTPStatus.OK