Skip to content

Commit 0254d9c

Browse files
committed
feat: add guardrails healthcheck to each endpoint test (#1014)
* feat: add guardrails healthcheck to each endpoint test * feat: remove time.sleep call
1 parent 2834890 commit 0254d9c

File tree

6 files changed

+100
-76
lines changed

6 files changed

+100
-76
lines changed

tests/fixtures/guardrails.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ocp_resources.route import Route
1313

1414
from utilities.constants import Labels, Annotations
15+
from utilities.guardrails import check_guardrails_health_endpoint
1516

1617
GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"
1718

@@ -160,6 +161,17 @@ def guardrails_orchestrator_health_route(
160161
yield guardrails_orchestrator_health_route
161162

162163

164+
@pytest.fixture
165+
def guardrails_healthcheck(
166+
current_client_token, openshift_ca_bundle_file, guardrails_orchestrator_health_route: Route
167+
) -> None:
168+
check_guardrails_health_endpoint(
169+
token=current_client_token,
170+
host=guardrails_orchestrator_health_route.host,
171+
ca_bundle_file=openshift_ca_bundle_file,
172+
)
173+
174+
163175
@pytest.fixture(scope="class")
164176
def guardrails_orchestrator_gateway_route(
165177
admin_client: DynamicClient,

tests/fixtures/inference.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,22 @@
1212
from ocp_resources.service import Service
1313
from ocp_resources.serving_runtime import ServingRuntime
1414
from pytest_testconfig import py_config
15+
from simple_logger.logger import get_logger
1516

1617
from utilities.constants import (
1718
RuntimeTemplates,
1819
KServeDeploymentType,
1920
QWEN_MODEL_NAME,
2021
LLMdInferenceSimConfig,
2122
)
23+
from timeout_sampler import retry
24+
2225
from utilities.inference_utils import create_isvc
23-
from utilities.infra import get_data_science_cluster
26+
from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
2427
from utilities.serving_runtime import ServingRuntimeFromTemplate
2528

29+
LOGGER = get_logger(name=__name__)
30+
2631

2732
@pytest.fixture(scope="class")
2833
def vllm_cpu_runtime(
@@ -130,8 +135,8 @@ def llm_d_inference_sim_serving_runtime(
130135
containers=[
131136
{
132137
"name": "kserve-container",
133-
"image": "quay.io/trustyai_testing/llmd-inference-sim-dataset-builtin"
134-
"@sha256:dfaa32cf0878a2fb522133e34369412c90e8ffbfa18b690b92602cf7c019fbbe",
138+
"image": "quay.io/trustyai_testing/llm-d-inference-sim-dataset-builtin"
139+
"@sha256:79e525cfd57a0d72b7e71d5f1e2dd398eca9315cfbd061d9d3e535b1ae736239",
135140
"imagePullPolicy": "Always",
136141
"args": ["--model", LLMdInferenceSimConfig.model_name, "--port", str(LLMdInferenceSimConfig.port)],
137142
"ports": [{"containerPort": LLMdInferenceSimConfig.port, "protocol": "TCP"}],
@@ -165,6 +170,7 @@ def llm_d_inference_sim_isvc(
165170
admin_client: DynamicClient,
166171
model_namespace: Namespace,
167172
llm_d_inference_sim_serving_runtime: ServingRuntime,
173+
patched_dsc_kserve_headed: DataScienceCluster,
168174
) -> Generator[InferenceService, Any, Any]:
169175
with create_isvc(
170176
client=admin_client,
@@ -199,7 +205,30 @@ def patched_dsc_kserve_headed(
199205
admin_client, kserve_controller_manager_deployment: Deployment
200206
) -> Generator[DataScienceCluster, None, None]:
201207
"""Configure KServe Services to work in Headed mode i.e. using the Service port instead of the Pod port"""
202-
dsc = get_data_science_cluster(client=admin_client)
203-
with ResourceEditor(patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}):
208+
209+
def _kserve_last_transition_time(dsc_resource: DataScienceCluster) -> str:
210+
return next(
211+
filter(lambda condition: condition["type"] == "KserveReady", dsc_resource.instance.status["conditions"])
212+
)["lastTransitionTime"]
213+
214+
@retry(wait_timeout=30, sleep=5)
215+
def _wait_for_headed_entities_status_ready(kserve_last_transition_time: str, dsc_resource: DataScienceCluster):
216+
if kserve_last_transition_time == _kserve_last_transition_time(dsc_resource):
217+
return False
204218
kserve_controller_manager_deployment.wait_for_replicas()
219+
wait_for_dsc_status_ready(dsc_resource=dsc_resource)
220+
return True
221+
222+
dsc = get_data_science_cluster(client=admin_client)
223+
if not dsc.instance.spec.components.kserve.rawDeploymentServiceConfig == "Headed":
224+
kserve_pre_transition_time = _kserve_last_transition_time(dsc_resource=dsc)
225+
with ResourceEditor(
226+
patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}
227+
):
228+
_wait_for_headed_entities_status_ready(
229+
kserve_last_transition_time=kserve_pre_transition_time, dsc_resource=dsc
230+
)
231+
yield dsc
232+
else:
233+
LOGGER.info("DSC already configured for Headed mode")
205234
yield dsc

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 20 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414
from tests.model_explainability.guardrails.utils import (
1515
create_detector_config,
16-
check_guardrails_health_endpoint,
1716
verify_health_info_response,
1817
send_and_verify_unsuitable_input_detection,
1918
send_and_verify_unsuitable_output_detection,
@@ -137,28 +136,14 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
137136
query directly to the model without performing any detection.
138137
"""
139138

140-
def test_guardrails_health_endpoint(
141-
self,
142-
current_client_token,
143-
openshift_ca_bundle_file,
144-
llm_d_inference_sim_isvc,
145-
orchestrator_config,
146-
guardrails_orchestrator_health_route,
147-
):
148-
response = check_guardrails_health_endpoint(
149-
host=guardrails_orchestrator_health_route.host,
150-
token=current_client_token,
151-
ca_bundle_file=openshift_ca_bundle_file,
152-
)
153-
assert "fms-guardrails-orchestr8" in response.text
154-
155139
def test_guardrails_info_endpoint(
156140
self,
157141
current_client_token,
158142
openshift_ca_bundle_file,
159143
llm_d_inference_sim_isvc,
160144
orchestrator_config,
161145
guardrails_orchestrator_health_route,
146+
guardrails_healthcheck,
162147
):
163148
verify_health_info_response(
164149
host=guardrails_orchestrator_health_route.host,
@@ -173,6 +158,7 @@ def test_guardrails_builtin_detectors_unsuitable_input(
173158
llm_d_inference_sim_isvc,
174159
orchestrator_config,
175160
guardrails_orchestrator_gateway_route,
161+
guardrails_healthcheck,
176162
):
177163
send_and_verify_unsuitable_input_detection(
178164
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -189,6 +175,7 @@ def test_guardrails_builtin_detectors_unsuitable_output(
189175
llm_d_inference_sim_isvc,
190176
orchestrator_config,
191177
guardrails_orchestrator_gateway_route,
178+
guardrails_healthcheck,
192179
):
193180
send_and_verify_unsuitable_output_detection(
194181
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -218,6 +205,7 @@ def test_guardrails_builtin_detectors_negative_detection(
218205
guardrails_orchestrator_gateway_route,
219206
message,
220207
url_path,
208+
guardrails_healthcheck,
221209
):
222210
send_and_verify_negative_detection(
223211
url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",
@@ -331,6 +319,7 @@ def test_guardrails_multi_detector_unsuitable_input(
331319
guardrails_orchestrator,
332320
otel_collector,
333321
tempo_stack,
322+
guardrails_healthcheck,
334323
):
335324
for prompt in [PROMPT_INJECTION_INPUT_DETECTION_PROMPT, HAP_INPUT_DETECTION_PROMPT]:
336325
send_and_verify_unsuitable_input_detection(
@@ -353,6 +342,7 @@ def test_guardrails_multi_detector_negative_detection(
353342
openshift_ca_bundle_file,
354343
otel_collector,
355344
tempo_stack,
345+
guardrails_healthcheck,
356346
):
357347
send_and_verify_negative_detection(
358348
url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
@@ -373,6 +363,7 @@ def test_guardrails_standalone_detector_endpoint(
373363
hap_detector_route,
374364
otel_collector,
375365
tempo_stack,
366+
guardrails_healthcheck,
376367
):
377368
send_and_verify_standalone_detection(
378369
url=f"https://{guardrails_orchestrator_route.host}/{STANDALONE_DETECTION_ENDPOINT}",
@@ -393,6 +384,7 @@ def test_guardrails_traces_in_tempo(
393384
otel_collector,
394385
tempo_stack,
395386
tempo_traces_service_portforward,
387+
guardrails_healthcheck,
396388
):
397389
"""
398390
Ensure that OpenTelemetry traces from Guardrails Orchestrator are collected in Tempo.
@@ -437,29 +429,13 @@ class TestGuardrailsOrchestratorAutoConfig:
437429
These tests verify that the GuardrailsOrchestrator works as expected when configured through the AutoConfig feature.
438430
"""
439431

440-
def test_guardrails_gateway_health_endpoint(
441-
self,
442-
current_client_token,
443-
llm_d_inference_sim_isvc,
444-
prompt_injection_detector_route,
445-
hap_detector_route,
446-
openshift_ca_bundle_file,
447-
guardrails_orchestrator,
448-
guardrails_orchestrator_health_route,
449-
):
450-
response = check_guardrails_health_endpoint(
451-
host=guardrails_orchestrator_health_route.host,
452-
token=current_client_token,
453-
ca_bundle_file=openshift_ca_bundle_file,
454-
)
455-
assert "fms-guardrails-orchestr8" in response.text
456-
457432
def test_guardrails_gateway_info_endpoint(
458433
self,
459434
current_client_token,
460435
openshift_ca_bundle_file,
461436
llm_d_inference_sim_isvc,
462437
guardrails_orchestrator_health_route,
438+
guardrails_healthcheck,
463439
):
464440
verify_health_info_response(
465441
host=guardrails_orchestrator_health_route.host,
@@ -473,6 +449,7 @@ def test_guardrails_autoconfig_unsuitable_input(
473449
openshift_ca_bundle_file,
474450
llm_d_inference_sim_isvc,
475451
guardrails_orchestrator_route,
452+
guardrails_healthcheck,
476453
):
477454
for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
478455
send_and_verify_unsuitable_input_detection(
@@ -490,6 +467,7 @@ def test_guardrails_autoconfig_negative_detection(
490467
llm_d_inference_sim_isvc,
491468
guardrails_orchestrator_route,
492469
openshift_ca_bundle_file,
470+
guardrails_healthcheck,
493471
):
494472
send_and_verify_negative_detection(
495473
url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
@@ -526,29 +504,15 @@ class TestGuardrailsOrchestratorAutoConfigWithGateway:
526504
through the AutoConfig feature to use the gateway route.
527505
"""
528506

529-
def test_guardrails_autoconfig_gateway_health_endpoint(
530-
self,
531-
current_client_token,
532-
llm_d_inference_sim_isvc,
533-
prompt_injection_detector_route,
534-
hap_detector_route,
535-
openshift_ca_bundle_file,
536-
guardrails_orchestrator,
537-
guardrails_orchestrator_health_route,
538-
):
539-
response = check_guardrails_health_endpoint(
540-
host=guardrails_orchestrator_health_route.host,
541-
token=current_client_token,
542-
ca_bundle_file=openshift_ca_bundle_file,
543-
)
544-
assert "fms-guardrails-orchestr8" in response.text
545-
546507
def test_guardrails_autoconfig_gateway_info_endpoint(
547508
self,
548509
current_client_token,
549510
openshift_ca_bundle_file,
550511
llm_d_inference_sim_isvc,
512+
hap_detector_isvc,
513+
prompt_injection_detector_isvc,
551514
guardrails_orchestrator_health_route,
515+
guardrails_healthcheck,
552516
):
553517
verify_health_info_response(
554518
host=guardrails_orchestrator_health_route.host,
@@ -561,7 +525,10 @@ def test_guardrails_autoconfig_gateway_unsuitable_input(
561525
current_client_token,
562526
openshift_ca_bundle_file,
563527
llm_d_inference_sim_isvc,
528+
prompt_injection_detector_isvc,
529+
hap_detector_isvc,
564530
guardrails_orchestrator_gateway_route,
531+
guardrails_healthcheck,
565532
):
566533
for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
567534
send_and_verify_unsuitable_input_detection(
@@ -588,10 +555,13 @@ def test_guardrails_autoconfig_gateway_negative_detection(
588555
self,
589556
current_client_token,
590557
llm_d_inference_sim_isvc,
558+
prompt_injection_detector_isvc,
559+
hap_detector_isvc,
591560
guardrails_orchestrator_gateway_route,
592561
openshift_ca_bundle_file,
593562
url_path,
594563
message,
564+
guardrails_healthcheck,
595565
):
596566
send_and_verify_negative_detection(
597567
url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",

tests/model_explainability/guardrails/utils.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@
88

99
from timeout_sampler import retry
1010

11+
from utilities.exceptions import UnexpectedValueError
12+
from utilities.guardrails import get_auth_headers
1113
from tests.model_explainability.guardrails.constants import GuardrailsDetectionPrompt
1214

1315
LOGGER = get_logger(name=__name__)
1416

1517

16-
def get_auth_headers(token: str) -> Dict[str, str]:
17-
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
18-
19-
2018
def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
2119
"""
2220
Constructs a chat detections payload for a given content string.
@@ -129,9 +127,15 @@ def verify_builtin_detector_unsuitable_input_response(
129127
response_data = verify_and_parse_response(response=response)
130128
errors = []
131129

132-
warnings = response_data.get("warnings", [])
130+
if not response_data:
131+
raise UnexpectedValueError("Expected non-empty response data but got an empty response.")
132+
133+
warnings = response_data.get("warnings")
133134
unsuitable_input_warning: str = "UNSUITABLE_INPUT"
134-
if len(warnings) != 1:
135+
136+
if warnings is None:
137+
raise UnexpectedValueError("Expected warnings in response, got None")
138+
elif len(warnings) != 1:
135139
errors.append(f"Expected 1 warning in response, got {len(warnings)}")
136140
elif warnings[0]["type"] != unsuitable_input_warning:
137141
errors.append(f"Expected warning type {unsuitable_input_warning}, got {warnings[0]['type']}")
@@ -239,20 +243,6 @@ def create_detector_config(*detector_names: str) -> Dict[str, Dict[str, Any]]:
239243
}
240244

241245

242-
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=10)
243-
def check_guardrails_health_endpoint(
244-
host,
245-
token,
246-
ca_bundle_file,
247-
):
248-
response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
249-
if response.status_code == http.HTTPStatus.OK:
250-
return response
251-
raise TimeoutError(
252-
f"Timeout waiting GuardrailsOrchestrator to be healthy. Response status code: {response.status_code}"
253-
)
254-
255-
256246
def verify_health_info_response(host, token, ca_bundle_file):
257247
response = requests.get(url=f"https://{host}/info", headers=get_auth_headers(token=token), verify=ca_bundle_file)
258248
assert response.status_code == http.HTTPStatus.OK
@@ -300,7 +290,7 @@ def send_chat_detections_request(
300290
)
301291

302292

303-
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=1)
293+
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=4)
304294
def send_and_verify_unsuitable_input_detection(
305295
url: str,
306296
token: str,

utilities/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,7 @@ class MissingParameter(Exception):
136136

137137
class ExceptionUserLogin(Exception):
138138
pass
139+
140+
141+
class UnexpectedValueError(Exception):
142+
"""Unexpected value found"""

utilities/guardrails.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import requests
2+
import http
3+
from typing import Dict
4+
5+
from timeout_sampler import retry
6+
7+
8+
def get_auth_headers(token: str) -> Dict[str, str]:
9+
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
10+
11+
12+
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=2)
13+
def check_guardrails_health_endpoint(
14+
host: str,
15+
token: str,
16+
ca_bundle_file: str,
17+
) -> bool:
18+
response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
19+
return response.status_code == http.HTTPStatus.OK

0 commit comments

Comments
 (0)