Skip to content

Commit c84f18b

Browse files
committed
Add test case for streaming in vllm orchestrator gateway
modified: tests/model_explainability/guardrails/test_guardrails.py modified: tests/model_explainability/guardrails/utils.py modified: tests/model_explainability/guardrails/test_guardrails.py modified: tests/model_explainability/guardrails/utils.py
1 parent ee22b90 commit c84f18b

File tree

2 files changed

+109
-17
lines changed

2 files changed

+109
-17
lines changed

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
137137
4.3. No detection.
138138
5. Check that the /passthrough endpoint forwards the
139139
query directly to the model without performing any detection.
140+
6. Verify that the Guardrails Orchestrator correctly detects unsuitable outputs
141+
when using built-in detectors in streaming mode.
140142
"""
141143

142144
def test_guardrails_health_endpoint(
@@ -200,6 +202,24 @@ def test_guardrails_builtin_detectors_unsuitable_output(
200202
model=LLMdInferenceSimConfig.model_name,
201203
)
202204

205+
206+
def test_guardrails_builtin_detectors_unsuitable_output_streaming(
207+
self,
208+
current_client_token,
209+
openshift_ca_bundle_file,
210+
llm_d_inference_sim_isvc,
211+
orchestrator_config,
212+
guardrails_orchestrator_gateway_route,
213+
):
214+
send_and_verify_unsuitable_output_detection(
215+
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
216+
token=current_client_token,
217+
ca_bundle_file=openshift_ca_bundle_file,
218+
prompt=PII_OUTPUT_DETECTION_PROMPT,
219+
model=LLMdInferenceSimConfig.model_name,
220+
stream=True,
221+
)
222+
203223
@pytest.mark.parametrize(
204224
"message, url_path",
205225
[

tests/model_explainability/guardrails/utils.py

Lines changed: 89 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def get_auth_headers(token: str) -> Dict[str, str]:
1717
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
1818

1919

20-
def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
20+
def get_chat_detections_payload(content: str, model: str, stream: bool = False, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
2121
"""
2222
Constructs a chat detections payload for a given content string.
2323
@@ -39,6 +39,9 @@ def get_chat_detections_payload(content: str, model: str, detectors: Optional[Di
3939
"temperature": 0,
4040
}
4141

42+
if stream:
43+
payload["stream"] = True
44+
4245
if detectors is not None:
4346
payload["detectors"] = detectors
4447

@@ -154,31 +157,92 @@ def verify_builtin_detector_unsuitable_input_response(
154157

155158

156159
def verify_builtin_detector_unsuitable_output_response(
157-
response: Response, detector_id: str, detection_name: str, detection_type: str
160+
response: Response,
161+
detector_id: str,
162+
detection_name: str,
163+
detection_type: str,
164+
stream: bool = False,
158165
) -> None:
159166
"""
160-
Verify that a guardrails response indicates an unsuitable output.
167+
Validate that a Guardrails response (streaming or non-streaming) indicates an UNSUITABLE_OUTPUT detection.
161168
162-
Args:
163-
response: The HTTP response object from the guardrails API
164-
detector_id: Expected detector ID
165-
detection_name: Expected detection name
166-
detection_type: Expected detection type
169+
This function parses the orchestrator output, extracts warnings and output detections,
170+
and verifies that they match the expected detector configuration.
167171
"""
168-
response_data = verify_and_parse_response(response=response)
172+
173+
if stream:
174+
text_chunks = []
175+
warnings = []
176+
output_detections = []
177+
178+
for line in response.iter_lines(decode_unicode=True):
179+
if not line:
180+
continue
181+
182+
if line.startswith("data:"):
183+
line = line[len("data:"):].strip()
184+
185+
if not line or line == "[DONE]":
186+
continue
187+
188+
try:
189+
obj = json.loads(line)
190+
except json.JSONDecodeError:
191+
text_chunks.append(line)
192+
continue
193+
194+
# Streamed delta text
195+
for choice in obj.get("choices", []):
196+
delta = choice.get("delta", {})
197+
content = delta.get("content")
198+
if content:
199+
text_chunks.append(content)
200+
201+
# Final detection-only event
202+
if "detections" in obj:
203+
output_detections = obj["detections"].get("output", [])
204+
205+
if "warnings" in obj:
206+
warnings = obj["warnings"]
207+
208+
if "guardrails_metadata" in obj:
209+
meta = obj["guardrails_metadata"]
210+
warnings = meta.get("warnings", [])
211+
output_detections = meta.get("output_detections", [])
212+
213+
# DON'T fail if no text was streamed – detection-only is allowed
214+
if not output_detections:
215+
return None
216+
217+
# Return only when final detection chunk arrives
218+
response_data = {
219+
"warnings": warnings,
220+
"detections": {"output": output_detections},
221+
}
222+
223+
else:
224+
# Non-stream case
225+
response_data = verify_and_parse_response(response=response)
226+
227+
# ---- Validation Logic ----
169228
errors = []
170229

230+
# Validate warning
171231
unsuitable_output_warning = "UNSUITABLE_OUTPUT"
172232
warnings = response_data.get("warnings", [])
173233
if len(warnings) != 1:
174234
errors.append(f"Expected 1 warning in response, got {len(warnings)}")
175-
elif warnings[0]["type"] != unsuitable_output_warning:
176-
errors.append(f"Expected warning type {unsuitable_output_warning}, got {warnings[0]['type']}")
235+
elif warnings[0].get("type") != unsuitable_output_warning:
236+
errors.append(
237+
f"Expected warning type {unsuitable_output_warning}, got {warnings[0].get('type')}"
238+
)
177239

240+
# Validate detections
178241
output_detections = response_data.get("detections", {}).get("output", [])
179-
180242
if len(output_detections) < 1:
181-
errors.append(f"Expected at least one output detection, but got {len(output_detections)}.")
243+
errors.append(
244+
f"Expected at least one output detection, but got {len(output_detections)}."
245+
)
182246
else:
183247
errors.extend(
184248
verify_detection(
@@ -189,7 +253,9 @@ def verify_builtin_detector_unsuitable_output_response(
189253
)
190254
)
191255

192-
assert_no_errors(errors=errors, failure_message_prefix="Unsuitable output detection verification failed")
256+
assert_no_errors(
257+
errors=errors, failure_message_prefix="Unsuitable output detection verification failed"
258+
)
193259

194260

195261
def verify_negative_detection_response(response: Response) -> None:
@@ -272,12 +338,15 @@ def _send_guardrails_orchestrator_post_request(
272338
token: str,
273339
ca_bundle_file: str,
274340
payload: Dict[str, Any],
341+
stream: bool = False,
342+
275343
) -> requests.Response:
276344
response = requests.post(
277345
url=url,
278346
headers=get_auth_headers(token=token),
279347
json=payload,
280348
verify=ca_bundle_file,
349+
stream=stream,
281350
)
282351

283352
if response.status_code != http.HTTPStatus.OK:
@@ -292,11 +361,12 @@ def send_chat_detections_request(
292361
ca_bundle_file: str,
293362
content: str,
294363
model: str,
364+
stream: bool = False,
295365
detectors: Dict[str, Any] = None,
296366
) -> requests.Response:
297-
payload = get_chat_detections_payload(content=content, model=model, detectors=detectors)
367+
payload = get_chat_detections_payload(content=content, model=model, detectors=detectors, stream=stream)
298368
return _send_guardrails_orchestrator_post_request(
299-
url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload
369+
url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload, stream=stream
300370
)
301371

302372

@@ -331,19 +401,21 @@ def send_and_verify_unsuitable_output_detection(
331401
ca_bundle_file: str,
332402
prompt: GuardrailsDetectionPrompt,
333403
model: str,
404+
stream: bool = False,
334405
detectors: Dict[str, Any] = None,
335406
):
336407
"""Send a prompt to the GuardrailsOrchestrator and verify that it triggers an unsuitable output detection"""
337408

338409
response = send_chat_detections_request(
339-
url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors
410+
url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors, stream=stream
340411
)
341412

342413
verify_builtin_detector_unsuitable_output_response(
343414
response=response,
344415
detector_id=prompt.detector_id,
345416
detection_name=prompt.detection_name,
346417
detection_type=prompt.detection_type,
418+
stream=stream,
347419
)
348420
return response
349421

0 commit comments

Comments
 (0)