Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/fixtures/guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ocp_resources.route import Route

from utilities.constants import Labels, Annotations
from utilities.guardrails import check_guardrails_health_endpoint

GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"

Expand Down Expand Up @@ -160,6 +161,17 @@ def guardrails_orchestrator_health_route(
yield guardrails_orchestrator_health_route


@pytest.fixture
def guardrails_healthcheck(
current_client_token, openshift_ca_bundle_file, guardrails_orchestrator_health_route: Route
) -> None:
check_guardrails_health_endpoint(
token=current_client_token,
host=guardrails_orchestrator_health_route.host,
ca_bundle_file=openshift_ca_bundle_file,
)


@pytest.fixture(scope="class")
def guardrails_orchestrator_gateway_route(
admin_client: DynamicClient,
Expand Down
23 changes: 18 additions & 5 deletions tests/fixtures/inference.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from typing import Generator, Any

import pytest
Expand All @@ -12,6 +13,7 @@
from ocp_resources.service import Service
from ocp_resources.serving_runtime import ServingRuntime
from pytest_testconfig import py_config
from simple_logger.logger import get_logger

from utilities.constants import (
RuntimeTemplates,
Expand All @@ -20,9 +22,11 @@
LLMdInferenceSimConfig,
)
from utilities.inference_utils import create_isvc
from utilities.infra import get_data_science_cluster
from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
from utilities.serving_runtime import ServingRuntimeFromTemplate

LOGGER = get_logger(name=__name__)


@pytest.fixture(scope="class")
def vllm_cpu_runtime(
Expand Down Expand Up @@ -130,8 +134,8 @@ def llm_d_inference_sim_serving_runtime(
containers=[
{
"name": "kserve-container",
"image": "quay.io/trustyai_testing/llmd-inference-sim-dataset-builtin"
"@sha256:dfaa32cf0878a2fb522133e34369412c90e8ffbfa18b690b92602cf7c019fbbe",
"image": "quay.io/trustyai_testing/llm-d-inference-sim-dataset-builtin"
"@sha256:79e525cfd57a0d72b7e71d5f1e2dd398eca9315cfbd061d9d3e535b1ae736239",
"imagePullPolicy": "Always",
"args": ["--model", LLMdInferenceSimConfig.model_name, "--port", str(LLMdInferenceSimConfig.port)],
"ports": [{"containerPort": LLMdInferenceSimConfig.port, "protocol": "TCP"}],
Expand Down Expand Up @@ -165,6 +169,7 @@ def llm_d_inference_sim_isvc(
admin_client: DynamicClient,
model_namespace: Namespace,
llm_d_inference_sim_serving_runtime: ServingRuntime,
patched_dsc_kserve_headed: DataScienceCluster,
) -> Generator[InferenceService, Any, Any]:
with create_isvc(
client=admin_client,
Expand Down Expand Up @@ -200,6 +205,14 @@ def patched_dsc_kserve_headed(
) -> Generator[DataScienceCluster, None, None]:
"""Configure KServe Services to work in Headed mode i.e. using the Service port instead of the Pod port"""
dsc = get_data_science_cluster(client=admin_client)
with ResourceEditor(patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}):
kserve_controller_manager_deployment.wait_for_replicas()
if not dsc.instance.spec.components.kserve.rawDeploymentServiceConfig == "Headed":
with ResourceEditor(
patches={dsc: {"spec": {"components": {"kserve": {"rawDeploymentServiceConfig": "Headed"}}}}}
):
time.sleep(20) # noqa: FCN001
kserve_controller_manager_deployment.wait_for_replicas()
wait_for_dsc_status_ready(dsc_resource=dsc)
yield dsc
else:
LOGGER.info("DSC already configured for Headed mode")
yield dsc
70 changes: 20 additions & 50 deletions tests/model_explainability/guardrails/test_guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from tests.model_explainability.guardrails.utils import (
create_detector_config,
check_guardrails_health_endpoint,
verify_health_info_response,
send_and_verify_unsuitable_input_detection,
send_and_verify_unsuitable_output_detection,
Expand Down Expand Up @@ -137,28 +136,14 @@ class TestGuardrailsOrchestratorWithBuiltInDetectors:
query directly to the model without performing any detection.
"""

def test_guardrails_health_endpoint(
self,
current_client_token,
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
orchestrator_config,
guardrails_orchestrator_health_route,
):
response = check_guardrails_health_endpoint(
host=guardrails_orchestrator_health_route.host,
token=current_client_token,
ca_bundle_file=openshift_ca_bundle_file,
)
assert "fms-guardrails-orchestr8" in response.text

def test_guardrails_info_endpoint(
self,
current_client_token,
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
orchestrator_config,
guardrails_orchestrator_health_route,
guardrails_healthcheck,
):
verify_health_info_response(
host=guardrails_orchestrator_health_route.host,
Expand All @@ -173,6 +158,7 @@ def test_guardrails_builtin_detectors_unsuitable_input(
llm_d_inference_sim_isvc,
orchestrator_config,
guardrails_orchestrator_gateway_route,
guardrails_healthcheck,
):
send_and_verify_unsuitable_input_detection(
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
Expand All @@ -189,6 +175,7 @@ def test_guardrails_builtin_detectors_unsuitable_output(
llm_d_inference_sim_isvc,
orchestrator_config,
guardrails_orchestrator_gateway_route,
guardrails_healthcheck,
):
send_and_verify_unsuitable_output_detection(
url=f"https://{guardrails_orchestrator_gateway_route.host}{PII_ENDPOINT}{OpenAIEnpoints.CHAT_COMPLETIONS}",
Expand Down Expand Up @@ -218,6 +205,7 @@ def test_guardrails_builtin_detectors_negative_detection(
guardrails_orchestrator_gateway_route,
message,
url_path,
guardrails_healthcheck,
):
send_and_verify_negative_detection(
url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",
Expand Down Expand Up @@ -331,6 +319,7 @@ def test_guardrails_multi_detector_unsuitable_input(
guardrails_orchestrator,
otel_collector,
tempo_stack,
guardrails_healthcheck,
):
for prompt in [PROMPT_INJECTION_INPUT_DETECTION_PROMPT, HAP_INPUT_DETECTION_PROMPT]:
send_and_verify_unsuitable_input_detection(
Expand All @@ -353,6 +342,7 @@ def test_guardrails_multi_detector_negative_detection(
openshift_ca_bundle_file,
otel_collector,
tempo_stack,
guardrails_healthcheck,
):
send_and_verify_negative_detection(
url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
Expand All @@ -373,6 +363,7 @@ def test_guardrails_standalone_detector_endpoint(
hap_detector_route,
otel_collector,
tempo_stack,
guardrails_healthcheck,
):
send_and_verify_standalone_detection(
url=f"https://{guardrails_orchestrator_route.host}/{STANDALONE_DETECTION_ENDPOINT}",
Expand All @@ -393,6 +384,7 @@ def test_guardrails_traces_in_tempo(
otel_collector,
tempo_stack,
tempo_traces_service_portforward,
guardrails_healthcheck,
):
"""
Ensure that OpenTelemetry traces from Guardrails Orchestrator are collected in Tempo.
Expand Down Expand Up @@ -437,29 +429,13 @@ class TestGuardrailsOrchestratorAutoConfig:
These tests verify that the GuardrailsOrchestrator works as expected when configured through the AutoConfig feature.
"""

def test_guardrails_gateway_health_endpoint(
self,
current_client_token,
llm_d_inference_sim_isvc,
prompt_injection_detector_route,
hap_detector_route,
openshift_ca_bundle_file,
guardrails_orchestrator,
guardrails_orchestrator_health_route,
):
response = check_guardrails_health_endpoint(
host=guardrails_orchestrator_health_route.host,
token=current_client_token,
ca_bundle_file=openshift_ca_bundle_file,
)
assert "fms-guardrails-orchestr8" in response.text

def test_guardrails_gateway_info_endpoint(
self,
current_client_token,
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
guardrails_orchestrator_health_route,
guardrails_healthcheck,
):
verify_health_info_response(
host=guardrails_orchestrator_health_route.host,
Expand All @@ -473,6 +449,7 @@ def test_guardrails_autoconfig_unsuitable_input(
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
guardrails_orchestrator_route,
guardrails_healthcheck,
):
for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
send_and_verify_unsuitable_input_detection(
Expand All @@ -490,6 +467,7 @@ def test_guardrails_autoconfig_negative_detection(
llm_d_inference_sim_isvc,
guardrails_orchestrator_route,
openshift_ca_bundle_file,
guardrails_healthcheck,
):
send_and_verify_negative_detection(
url=f"https://{guardrails_orchestrator_route.host}/{CHAT_COMPLETIONS_DETECTION_ENDPOINT}",
Expand Down Expand Up @@ -526,29 +504,15 @@ class TestGuardrailsOrchestratorAutoConfigWithGateway:
through the AutoConfig feature to use the gateway route.
"""

def test_guardrails_autoconfig_gateway_health_endpoint(
self,
current_client_token,
llm_d_inference_sim_isvc,
prompt_injection_detector_route,
hap_detector_route,
openshift_ca_bundle_file,
guardrails_orchestrator,
guardrails_orchestrator_health_route,
):
response = check_guardrails_health_endpoint(
host=guardrails_orchestrator_health_route.host,
token=current_client_token,
ca_bundle_file=openshift_ca_bundle_file,
)
assert "fms-guardrails-orchestr8" in response.text

def test_guardrails_autoconfig_gateway_info_endpoint(
self,
current_client_token,
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
hap_detector_isvc,
prompt_injection_detector_isvc,
guardrails_orchestrator_health_route,
guardrails_healthcheck,
):
verify_health_info_response(
host=guardrails_orchestrator_health_route.host,
Expand All @@ -561,7 +525,10 @@ def test_guardrails_autoconfig_gateway_unsuitable_input(
current_client_token,
openshift_ca_bundle_file,
llm_d_inference_sim_isvc,
prompt_injection_detector_isvc,
hap_detector_isvc,
guardrails_orchestrator_gateway_route,
guardrails_healthcheck,
):
for prompt in [HAP_INPUT_DETECTION_PROMPT, PROMPT_INJECTION_INPUT_DETECTION_PROMPT]:
send_and_verify_unsuitable_input_detection(
Expand All @@ -588,10 +555,13 @@ def test_guardrails_autoconfig_gateway_negative_detection(
self,
current_client_token,
llm_d_inference_sim_isvc,
prompt_injection_detector_isvc,
hap_detector_isvc,
guardrails_orchestrator_gateway_route,
openshift_ca_bundle_file,
url_path,
message,
guardrails_healthcheck,
):
send_and_verify_negative_detection(
url=f"https://{guardrails_orchestrator_gateway_route.host}{url_path}{OpenAIEnpoints.CHAT_COMPLETIONS}",
Expand Down
26 changes: 5 additions & 21 deletions tests/model_explainability/guardrails/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
from typing import Dict, Any, List, Optional

from timeout_sampler import retry

from utilities.guardrails import get_auth_headers
from tests.model_explainability.guardrails.constants import GuardrailsDetectionPrompt

LOGGER = get_logger(name=__name__)


def get_auth_headers(token: str) -> Dict[str, str]:
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}


def get_chat_detections_payload(content: str, model: str, detectors: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Constructs a chat detections payload for a given content string.
Expand Down Expand Up @@ -131,7 +127,9 @@ def verify_builtin_detector_unsuitable_input_response(

warnings = response_data.get("warnings", [])
unsuitable_input_warning: str = "UNSUITABLE_INPUT"
if len(warnings) != 1:
if warnings is None:
errors.append("Expected warnings in response, got None")
elif len(warnings) != 1:
errors.append(f"Expected 1 warning in response, got {len(warnings)}")
elif warnings[0]["type"] != unsuitable_input_warning:
errors.append(f"Expected warning type {unsuitable_input_warning}, got {warnings[0]['type']}")
Expand Down Expand Up @@ -239,20 +237,6 @@ def create_detector_config(*detector_names: str) -> Dict[str, Dict[str, Any]]:
}


@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=10)
def check_guardrails_health_endpoint(
host,
token,
ca_bundle_file,
):
response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
if response.status_code == http.HTTPStatus.OK:
return response
raise TimeoutError(
f"Timeout waiting GuardrailsOrchestrator to be healthy. Response status code: {response.status_code}"
)


def verify_health_info_response(host, token, ca_bundle_file):
response = requests.get(url=f"https://{host}/info", headers=get_auth_headers(token=token), verify=ca_bundle_file)
assert response.status_code == http.HTTPStatus.OK
Expand Down Expand Up @@ -300,7 +284,7 @@ def send_chat_detections_request(
)


@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=1)
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=120, sleep=4)
def send_and_verify_unsuitable_input_detection(
url: str,
token: str,
Expand Down
19 changes: 19 additions & 0 deletions utilities/guardrails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import requests
import http
from typing import Dict

from timeout_sampler import retry


def get_auth_headers(token: str) -> Dict[str, str]:
return {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}


@retry(exceptions_dict={TimeoutError: []}, wait_timeout=10, sleep=2)
def check_guardrails_health_endpoint(
host: str,
token: str,
ca_bundle_file: str,
) -> bool:
response = requests.get(url=f"https://{host}/health", headers=get_auth_headers(token=token), verify=ca_bundle_file)
return response.status_code == http.HTTPStatus.OK