Skip to content

Commit 5762392

Browse files
dbasunagkpunwatk
authored andcommitted
Reduce PVC size to accomodate running on clusters with limited storage (#869)
1 parent cae77f6 commit 5762392

File tree

3 files changed

+100
-18
lines changed

3 files changed

+100
-18
lines changed

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
137137
4.3. No detection.
138138
5. Check that the /passthrough endpoint forwards the
139139
query directly to the model without performing any detection.
140+
6. Verify that the Guardrails Orchestrator correctly detects unsuitable outputs
141+
when using built-in detectors in streaming mode.
140142
"""
141143

142144
def test_guardrails_health_endpoint(
@@ -200,6 +202,23 @@ def test_guardrails_builtin_detectors_unsuitable_output(
200202
model=LLMdInferenceSimConfig.model_name,
201203
)
202204

205+
def test_guardrails_builtin_detectors_unsuitable_output_streaming(
206+
self,
207+
current_client_token,
208+
openshift_ca_bundle_file,
209+
llm_d_inference_sim_isvc,
210+
orchestrator_config,
211+
guardrails_orchestrator_gateway_route,
212+
):
213+
send_and_verify_unsuitable_output_detection(
214+
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
215+
token=current_client_token,
216+
ca_bundle_file=openshift_ca_bundle_file,
217+
prompt=PII_OUTPUT_DETECTION_PROMPT,
218+
model=LLMdInferenceSimConfig.model_name,
219+
stream=True, # enable streaming
220+
)
221+
203222
@pytest.mark.parametrize(
204223
"message, url_path",
205224
[

tests/model_explainability/guardrails/utils.py

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
def get_auth_headers(token: str) -> Dict[str, str]:
1717
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
1818

19-
20-
def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
19+
def get_chat_detections_payload(content: str, model: str, stream: bool = False,
20+
detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
2121
"""
2222
Constructs a chat detections payload for a given content string.
2323
@@ -39,6 +39,9 @@ def get_chat_detections_payload(content: str, model: str, detectors: Optional[Di
3939
"temperature": 0,
4040
}
4141

42+
if stream:
43+
payload["stream"] = True
44+
4245
if detectors is not None:
4346
payload["detectors"] = detectors
4447

@@ -154,31 +157,82 @@ def verify_builtin_detector_unsuitable_input_response(
154157

155158

156159
def verify_builtin_detector_unsuitable_output_response(
157-
response: Response, detector_id: str, detection_name: str, detection_type: str
160+
response: Response,
161+
detector_id: str,
162+
detection_name: str,
163+
detection_type: str,
164+
stream: bool = False,
158165
) -> None:
159166
"""
160167
Verify that a guardrails response indicates an unsuitable output.
161-
162-
Args:
163-
response: The HTTP response object from the guardrails API
164-
detector_id: Expected detector ID
165-
detection_name: Expected detection name
166-
detection_type: Expected detection type
168+
Supports both streaming (SSE) and non-streaming responses.
167169
"""
168-
response_data = verify_and_parse_response(response=response)
170+
171+
response_data = None
172+
173+
if stream:
174+
text_chunks = []
175+
metadata_found = False
176+
177+
for line in response.iter_lines(decode_unicode=True):
178+
if not line:
179+
continue
180+
181+
if line.startswith("data:"):
182+
data = line[len("data:"):].strip()
183+
if data == "[DONE]":
184+
break
185+
try:
186+
obj = json.loads(data)
187+
if "guardrails_metadata" in obj:
188+
meta = obj["guardrails_metadata"]
189+
warnings = meta.get("warnings", [])
190+
output_detections = meta.get("output_detections", [])
191+
192+
response_data = {
193+
"warnings": warnings,
194+
"detections": {"output": output_detections},
195+
}
196+
metadata_found = True
197+
break
198+
except json.JSONDecodeError:
199+
# treat as plain text
200+
text_chunks.append(data)
201+
202+
if not metadata_found:
203+
final_text = "".join(text_chunks).strip()
204+
if not final_text:
205+
raise AssertionError("No streamed text received")
206+
response_data = {
207+
"warnings": [],
208+
"detections": {"output": []},
209+
}
210+
211+
else:
212+
# --- NON-STREAMING MODE ---
213+
response_data = verify_and_parse_response(response=response)
214+
169215
errors = []
170216

217+
# Validate warnings
171218
unsuitable_output_warning = "UNSUITABLE_OUTPUT"
172219
warnings = response_data.get("warnings", [])
220+
173221
if len(warnings) != 1:
174222
errors.append(f"Expected 1 warning in response, got {len(warnings)}")
175-
elif warnings[0]["type"] != unsuitable_output_warning:
176-
errors.append(f"Expected warning type {unsuitable_output_warning}, got {warnings[0]['type']}")
223+
elif warnings and warnings[0].get("type") != unsuitable_output_warning:
224+
errors.append(
225+
f"Expected warning type {unsuitable_output_warning}, "
226+
f"got {warnings[0].get('type')}"
227+
)
177228

229+
# Validate detections
178230
output_detections = response_data.get("detections", {}).get("output", [])
179231

180232
if len(output_detections) < 1:
181-
errors.append(f"Expected at least one output detection, but got {len(output_detections)}.")
233+
errors.append(
234+
f"Expected at least one output detection, but got {len(output_detections)}."
235+
)
182236
else:
183237
errors.extend(
184238
verify_detection(
@@ -189,7 +243,11 @@ def verify_builtin_detector_unsuitable_output_response(
189243
)
190244
)
191245

192-
assert_no_errors(errors=errors, failure_message_prefix="Unsuitable output detection verification failed")
246+
# Final assertion
247+
assert_no_errors(
248+
errors=errors,
249+
failure_message_prefix="Unsuitable output detection verification failed"
250+
)
193251

194252

195253
def verify_negative_detection_response(response: Response) -> None:
@@ -272,12 +330,14 @@ def _send_guardrails_orchestrator_post_request(
272330
token: str,
273331
ca_bundle_file: str,
274332
payload: Dict[str, Any],
333+
stream: bool = False,
275334
) -> requests.Response:
276335
response = requests.post(
277336
url=url,
278337
headers=get_auth_headers(token=token),
279338
json=payload,
280339
verify=ca_bundle_file,
340+
stream=stream,
281341
)
282342

283343
if response.status_code != http.HTTPStatus.OK:
@@ -293,10 +353,11 @@ def send_chat_detections_request(
293353
content: str,
294354
model: str,
295355
detectors: Dict[str, Any] = None,
356+
stream: bool = False,
296357
) -> requests.Response:
297-
payload = get_chat_detections_payload(content=content, model=model, detectors=detectors)
358+
payload = get_chat_detections_payload(content=content, model=model, detectors=detectors, stream=stream)
298359
return _send_guardrails_orchestrator_post_request(
299-
url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload
360+
url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload, stream=stream
300361
)
301362

302363

@@ -332,18 +393,20 @@ def send_and_verify_unsuitable_output_detection(
332393
prompt: GuardrailsDetectionPrompt,
333394
model: str,
334395
detectors: Dict[str, Any] = None,
396+
stream: bool = False,
335397
):
336398
"""Send a prompt to the GuardrailsOrchestrator and verify that it triggers an unsuitable output detection"""
337399

338400
response = send_chat_detections_request(
339-
url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors
401+
url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors, stream=stream
340402
)
341403

342404
verify_builtin_detector_unsuitable_output_response(
343405
response=response,
344406
detector_id=prompt.detector_id,
345407
detection_name=prompt.detection_name,
346408
detection_type=prompt.detection_type,
409+
stream=stream,
347410
)
348411
return response
349412

tests/model_registry/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def get_mr_pvc_objects(
456456
name=name,
457457
namespace=namespace,
458458
client=client,
459-
size="5Gi",
459+
size="3Gi",
460460
label=get_model_registry_db_label_dict(db_resource_name=name),
461461
teardown=teardown_resources,
462462
)

0 commit comments

Comments
 (0)