Reduce PVC size to accomodate running on clusters with limited storage (#869)

dbasunag · kpunwatk · commit 576239293021 · 2025-11-20T13:48:08.000Z
diff --git a/tests/model_explainability/guardrails/test_guardrails.py b/tests/model_explainability/guardrails/test_guardrails.py
@@ -137,6 +137,8 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
          4.3. No detection.
         5. Check that the /passthrough endpoint forwards the
          query directly to the model without performing any detection.
+        6. Verify that the Guardrails Orchestrator correctly detects unsuitable outputs
+        when using built-in detectors in streaming mode.
     """
 
     def test_guardrails_health_endpoint(
@@ -200,6 +202,23 @@ def test_guardrails_builtin_detectors_unsuitable_output(
             model=LLMdInferenceSimConfig.model_name,
         )
 
+    def test_guardrails_builtin_detectors_unsuitable_output_streaming(
+            self,
+            current_client_token,
+            openshift_ca_bundle_file,
+            llm_d_inference_sim_isvc,
+            orchestrator_config,
+            guardrails_orchestrator_gateway_route,
+    ):
+        send_and_verify_unsuitable_output_detection(
+            url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
+            token=current_client_token,
+            ca_bundle_file=openshift_ca_bundle_file,
+            prompt=PII_OUTPUT_DETECTION_PROMPT,
+            model=LLMdInferenceSimConfig.model_name,
+            stream=True,  # enable streaming
+        )
+
     @pytest.mark.parametrize(
         "message, url_path",
         [
diff --git a/tests/model_explainability/guardrails/utils.py b/tests/model_explainability/guardrails/utils.py
@@ -16,8 +16,8 @@
 def get_auth_headers(token: str) -> Dict[str, str]:
     return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
 
-
-def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+def get_chat_detections_payload(content: str, model: str, stream: bool = False,
+    detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     """
     Constructs a chat detections payload for a given content string.
 
@@ -39,6 +39,9 @@ def get_chat_detections_payload(content: str, model: str, detectors: Optional[Di
         "temperature": 0,
     }
 
+    if stream:
+        payload["stream"] = True
+
     if detectors is not None:
         payload["detectors"] = detectors
 
@@ -154,31 +157,82 @@ def verify_builtin_detector_unsuitable_input_response(
 
 
 def verify_builtin_detector_unsuitable_output_response(
-    response: Response, detector_id: str, detection_name: str, detection_type: str
+    response: Response,
+    detector_id: str,
+    detection_name: str,
+    detection_type: str,
+    stream: bool = False,
 ) -> None:
     """
     Verify that a guardrails response indicates an unsuitable output.
-
-    Args:
-        response: The HTTP response object from the guardrails API
-        detector_id: Expected detector ID
-        detection_name: Expected detection name
-        detection_type: Expected detection type
+    Supports both streaming (SSE) and non-streaming responses.
     """
-    response_data = verify_and_parse_response(response=response)
+
+    response_data = None
+
+    if stream:
+        text_chunks = []
+        metadata_found = False
+
+        for line in response.iter_lines(decode_unicode=True):
+            if not line:
+                continue
+
+            if line.startswith("data:"):
+                data = line[len("data:"):].strip()
+                if data == "[DONE]":
+                    break
+                try:
+                    obj = json.loads(data)
+                    if "guardrails_metadata" in obj:
+                        meta = obj["guardrails_metadata"]
+                        warnings = meta.get("warnings", [])
+                        output_detections = meta.get("output_detections", [])
+
+                        response_data = {
+                            "warnings": warnings,
+                            "detections": {"output": output_detections},
+                        }
+                        metadata_found = True
+                        break
+                except json.JSONDecodeError:
+                    # treat as plain text
+                    text_chunks.append(data)
+
+        if not metadata_found:
+            final_text = "".join(text_chunks).strip()
+            if not final_text:
+                raise AssertionError("No streamed text received")
+            response_data = {
+                "warnings": [],
+                "detections": {"output": []},
+            }
+
+    else:
+        # --- NON-STREAMING MODE ---
+        response_data = verify_and_parse_response(response=response)
+
     errors = []
 
+    # Validate warnings
     unsuitable_output_warning = "UNSUITABLE_OUTPUT"
     warnings = response_data.get("warnings", [])
+
     if len(warnings) != 1:
         errors.append(f"Expected 1 warning in response, got {len(warnings)}")
-    elif warnings[0]["type"] != unsuitable_output_warning:
-        errors.append(f"Expected warning type {unsuitable_output_warning}, got {warnings[0]['type']}")
+    elif warnings and warnings[0].get("type") != unsuitable_output_warning:
+        errors.append(
+            f"Expected warning type {unsuitable_output_warning}, "
+            f"got {warnings[0].get('type')}"
+        )
 
+    # Validate detections
     output_detections = response_data.get("detections", {}).get("output", [])
 
     if len(output_detections) < 1:
-        errors.append(f"Expected at least one output detection, but got {len(output_detections)}.")
+        errors.append(
+            f"Expected at least one output detection, but got {len(output_detections)}."
+        )
     else:
         errors.extend(
             verify_detection(
@@ -189,7 +243,11 @@ def verify_builtin_detector_unsuitable_output_response(
             )
         )
 
-    assert_no_errors(errors=errors, failure_message_prefix="Unsuitable output detection verification failed")
+    # Final assertion
+    assert_no_errors(
+        errors=errors,
+        failure_message_prefix="Unsuitable output detection verification failed"
+    )
 
 
 def verify_negative_detection_response(response: Response) -> None:
@@ -272,12 +330,14 @@ def _send_guardrails_orchestrator_post_request(
     token: str,
     ca_bundle_file: str,
     payload: Dict[str, Any],
+    stream: bool = False,
 ) -> requests.Response:
     response = requests.post(
         url=url,
         headers=get_auth_headers(token=token),
         json=payload,
         verify=ca_bundle_file,
+        stream=stream,
     )
 
     if response.status_code != http.HTTPStatus.OK:
@@ -293,10 +353,11 @@ def send_chat_detections_request(
     content: str,
     model: str,
     detectors: Dict[str, Any] = None,
+    stream: bool = False,
 ) -> requests.Response:
-    payload = get_chat_detections_payload(content=content, model=model, detectors=detectors)
+    payload = get_chat_detections_payload(content=content, model=model, detectors=detectors, stream=stream)
     return _send_guardrails_orchestrator_post_request(
-        url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload
+        url=url, token=token, ca_bundle_file=ca_bundle_file, payload=payload, stream=stream
     )
 
 
@@ -332,18 +393,20 @@ def send_and_verify_unsuitable_output_detection(
     prompt: GuardrailsDetectionPrompt,
     model: str,
     detectors: Dict[str, Any] = None,
+    stream: bool = False,
 ):
     """Send a prompt to the GuardrailsOrchestrator and verify that it triggers an unsuitable output detection"""
 
     response = send_chat_detections_request(
-        url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors
+        url=url, token=token, ca_bundle_file=ca_bundle_file, content=prompt.content, model=model, detectors=detectors, stream=stream
     )
 
     verify_builtin_detector_unsuitable_output_response(
         response=response,
         detector_id=prompt.detector_id,
         detection_name=prompt.detection_name,
         detection_type=prompt.detection_type,
+        stream=stream,
     )
     return response
 
diff --git a/tests/model_registry/utils.py b/tests/model_registry/utils.py
@@ -456,7 +456,7 @@ def get_mr_pvc_objects(
                 name=name,
                 namespace=namespace,
                 client=client,
-                size="5Gi",
+                size="3Gi",
                 label=get_model_registry_db_label_dict(db_resource_name=name),
                 teardown=teardown_resources,
             )

Original file line number	Diff line number	Diff line change
`@@ -456,7 +456,7 @@ def get_mr_pvc_objects(`
`456`	`456`	`name=name,`
`457`	`457`	`namespace=namespace,`
`458`	`458`	`client=client,`
`459`		`- size="5Gi",`
	`459`	`+ size="3Gi",`
`460`	`460`	`label=get_model_registry_db_label_dict(db_resource_name=name),`
`461`	`461`	`teardown=teardown_resources,`
`462`	`462`	`)`