Skip to content

Commit 906091d

Browse files
authored
feat: add tests for guardrails with huggingface detectors (#369)
* wip: test for guardrails with huggingface detectors * wip: more work on guardrails * test: polishing the code and adding docstring * improve function to get chat detections payload, replace minio image
1 parent a73fc25 commit 906091d

File tree

5 files changed

+308
-35
lines changed

5 files changed

+308
-35
lines changed

tests/model_explainability/guardrails/conftest.py

Lines changed: 175 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,21 @@
2424
from utilities.inference_utils import create_isvc
2525
from utilities.serving_runtime import ServingRuntimeFromTemplate
2626

27+
ORCHESTRATOR_CONFIGMAP_NAME = "fms-orchestr8-config-nlp"
28+
29+
QWEN_ISVC_NAME = "qwen-isvc"
30+
2731
GORCH_NAME = "gorch-test"
2832

2933
USER_ONE: str = "user-one"
3034
GUARDRAILS_ORCHESTRATOR_PORT: int = 8032
3135

3236

3337
@pytest.fixture(scope="class")
34-
def guardrails_orchestrator(
38+
def guardrails_orchestrator_with_builtin_detectors(
3539
admin_client: DynamicClient,
3640
model_namespace: Namespace,
37-
orchestrator_configmap: ConfigMap,
41+
gorch_with_builtin_detectors_configmap: ConfigMap,
3842
guardrails_gateway_config: ConfigMap,
3943
) -> Generator[GuardrailsOrchestrator, Any, Any]:
4044
with GuardrailsOrchestrator(
@@ -43,7 +47,7 @@ def guardrails_orchestrator(
4347
namespace=model_namespace.name,
4448
enable_built_in_detectors=True,
4549
enable_guardrails_gateway=True,
46-
orchestrator_config=orchestrator_configmap.name,
50+
orchestrator_config=gorch_with_builtin_detectors_configmap.name,
4751
guardrails_gateway_config=guardrails_gateway_config.name,
4852
replicas=1,
4953
wait_for_resource=True,
@@ -53,15 +57,36 @@ def guardrails_orchestrator(
5357
yield gorch
5458

5559

60+
@pytest.fixture(scope="class")
61+
def guardrails_orchestrator_with_hf_detectors(
62+
admin_client: DynamicClient,
63+
model_namespace: Namespace,
64+
gorch_with_hf_detectors_configmap: ConfigMap,
65+
) -> Generator[GuardrailsOrchestrator, Any, Any]:
66+
with GuardrailsOrchestrator(
67+
client=admin_client,
68+
name=GORCH_NAME,
69+
namespace=model_namespace.name,
70+
enable_built_in_detectors=False,
71+
enable_guardrails_gateway=False,
72+
orchestrator_config=gorch_with_hf_detectors_configmap.name,
73+
replicas=1,
74+
wait_for_resource=True,
75+
) as gorch:
76+
orchestrator_deployment = Deployment(name=gorch.name, namespace=gorch.namespace, wait_for_resource=True)
77+
orchestrator_deployment.wait_for_replicas()
78+
yield gorch
79+
80+
5681
@pytest.fixture(scope="class")
5782
def guardrails_orchestrator_health_route(
5883
admin_client: DynamicClient,
5984
model_namespace: Namespace,
60-
guardrails_orchestrator: GuardrailsOrchestrator,
85+
guardrails_orchestrator_with_builtin_detectors: GuardrailsOrchestrator,
6186
) -> Generator[Route, Any, Any]:
6287
yield Route(
63-
name=f"{guardrails_orchestrator.name}-health",
64-
namespace=guardrails_orchestrator.namespace,
88+
name=f"{guardrails_orchestrator_with_builtin_detectors.name}-health",
89+
namespace=guardrails_orchestrator_with_builtin_detectors.namespace,
6590
wait_for_resource=True,
6691
)
6792

@@ -70,18 +95,47 @@ def guardrails_orchestrator_health_route(
7095
def guardrails_orchestrator_route(
7196
admin_client: DynamicClient,
7297
model_namespace: Namespace,
73-
guardrails_orchestrator: GuardrailsOrchestrator,
98+
guardrails_orchestrator_with_builtin_detectors: GuardrailsOrchestrator,
7499
) -> Generator[Route, Any, Any]:
75100
yield Route(
76-
name=f"{guardrails_orchestrator.name}",
77-
namespace=guardrails_orchestrator.namespace,
101+
name=f"{guardrails_orchestrator_with_builtin_detectors.name}",
102+
namespace=guardrails_orchestrator_with_builtin_detectors.namespace,
103+
wait_for_resource=True,
104+
)
105+
106+
107+
@pytest.fixture(scope="class")
108+
def guardrails_orchestrator_with_hf_detectors_route(
109+
admin_client: DynamicClient,
110+
model_namespace: Namespace,
111+
guardrails_orchestrator_with_hf_detectors: GuardrailsOrchestrator,
112+
) -> Generator[Route, Any, Any]:
113+
yield Route(
114+
name=f"{guardrails_orchestrator_with_hf_detectors.name}",
115+
namespace=guardrails_orchestrator_with_hf_detectors.namespace,
116+
wait_for_resource=True,
117+
)
118+
119+
120+
@pytest.fixture(scope="class")
121+
def prompt_injection_detector_route(
122+
admin_client: DynamicClient,
123+
model_namespace: Namespace,
124+
prompt_injection_detector_isvc: InferenceService,
125+
) -> Generator[Route, Any, Any]:
126+
yield Route(
127+
name="prompt-injection-detector-route",
128+
namespace=model_namespace.name,
129+
service=prompt_injection_detector_isvc.name,
78130
wait_for_resource=True,
79131
)
80132

81133

82134
@pytest.fixture(scope="class")
83135
def guardrails_orchestrator_pod(
84-
admin_client: DynamicClient, model_namespace: Namespace, guardrails_orchestrator: GuardrailsOrchestrator
136+
admin_client: DynamicClient,
137+
model_namespace: Namespace,
138+
guardrails_orchestrator_with_builtin_detectors: GuardrailsOrchestrator,
85139
) -> Pod:
86140
return list(Pod.get(namespace=model_namespace.name, label_selector=f"app.kubernetes.io/instance={GORCH_NAME}"))[0]
87141

@@ -97,7 +151,7 @@ def qwen_isvc(
97151
) -> Generator[InferenceService, Any, Any]:
98152
with create_isvc(
99153
client=admin_client,
100-
name="llm",
154+
name=QWEN_ISVC_NAME,
101155
namespace=model_namespace.name,
102156
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
103157
model_format="vLLM",
@@ -114,6 +168,37 @@ def qwen_isvc(
114168
yield isvc
115169

116170

171+
@pytest.fixture(scope="class")
172+
def prompt_injection_detector_isvc(
173+
admin_client: DynamicClient,
174+
model_namespace: Namespace,
175+
minio_data_connection: Secret,
176+
huggingface_sr: ServingRuntime,
177+
) -> Generator[InferenceService, Any, Any]:
178+
with create_isvc(
179+
client=admin_client,
180+
name="prompt-injection-detector",
181+
namespace=model_namespace.name,
182+
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
183+
model_format="guardrails-detector-huggingface",
184+
runtime=huggingface_sr.name,
185+
storage_key=minio_data_connection.name,
186+
storage_path="deberta-v3-base-prompt-injection-v2",
187+
wait_for_predictor_pods=False,
188+
enable_auth=False,
189+
resources={
190+
"requests": {"cpu": "1", "memory": "2Gi", "nvidia.com/gpu": "0"},
191+
"limits": {"cpu": "1", "memory": "2Gi", "nvidia.com/gpu": "0"},
192+
},
193+
max_replicas=1,
194+
min_replicas=1,
195+
labels={
196+
"opendatahub.io/dashboard": "true",
197+
},
198+
) as isvc:
199+
yield isvc
200+
201+
117202
@pytest.fixture(scope="class")
118203
def vllm_runtime(
119204
admin_client: DynamicClient,
@@ -146,20 +231,61 @@ def vllm_runtime(
146231

147232

148233
@pytest.fixture(scope="class")
149-
def orchestrator_configmap(
234+
def huggingface_sr(
235+
admin_client: DynamicClient,
236+
model_namespace: Namespace,
237+
) -> Generator[ServingRuntime, Any, Any]:
238+
with ServingRuntime(
239+
client=admin_client,
240+
name="guardrails-detector-runtime-prompt-injection",
241+
namespace=model_namespace.name,
242+
containers=[
243+
{
244+
"name": "kserve-container",
245+
"image": "quay.io/trustyai/guardrails-detector-huggingface-runtime:v0.2.0",
246+
"command": ["uvicorn", "app:app"],
247+
"args": [
248+
"--workers=4",
249+
"--host=0.0.0.0",
250+
"--port=8000",
251+
"--log-config=/common/log_conf.yaml",
252+
],
253+
"env": [
254+
{"name": "MODEL_DIR", "value": "/mnt/models"},
255+
{"name": "HF_HOME", "value": "/tmp/hf_home"},
256+
],
257+
"ports": [{"containerPort": 8000, "protocol": "TCP"}],
258+
}
259+
],
260+
supported_model_formats=[{"name": "guardrails-detector-huggingface", "autoSelect": True}],
261+
multi_model=False,
262+
annotations={
263+
"openshift.io/display-name": "Guardrails Detector ServingRuntime for KServe",
264+
"opendatahub.io/recommended-accelerators": '["nvidia.com/gpu"]',
265+
"prometheus.io/port": "8080",
266+
"prometheus.io/path": "/metrics",
267+
},
268+
label={
269+
"opendatahub.io/dashboard": "true",
270+
},
271+
) as serving_runtime:
272+
yield serving_runtime
273+
274+
275+
@pytest.fixture(scope="class")
276+
def gorch_with_builtin_detectors_configmap(
150277
admin_client: DynamicClient,
151278
model_namespace: Namespace,
152-
qwen_isvc: InferenceService,
153279
) -> Generator[ConfigMap, Any, Any]:
154280
with ConfigMap(
155281
client=admin_client,
156-
name="fms-orchestr8-config-nlp",
282+
name=ORCHESTRATOR_CONFIGMAP_NAME,
157283
namespace=model_namespace.name,
158284
data={
159285
"config.yaml": yaml.dump({
160286
"chat_generation": {
161287
"service": {
162-
"hostname": f"{qwen_isvc.name}-predictor.{model_namespace.name}.svc.cluster.local",
288+
"hostname": f"{QWEN_ISVC_NAME}-predictor.{model_namespace.name}.svc.cluster.local",
163289
"port": GUARDRAILS_ORCHESTRATOR_PORT,
164290
}
165291
},
@@ -180,6 +306,40 @@ def orchestrator_configmap(
180306
yield cm
181307

182308

309+
@pytest.fixture(scope="class")
310+
def gorch_with_hf_detectors_configmap(
311+
admin_client: DynamicClient,
312+
model_namespace: Namespace,
313+
) -> Generator[ConfigMap, Any, Any]:
314+
with ConfigMap(
315+
client=admin_client,
316+
name=ORCHESTRATOR_CONFIGMAP_NAME,
317+
namespace=model_namespace.name,
318+
data={
319+
"config.yaml": yaml.dump({
320+
"chat_generation": {
321+
"service": {
322+
"hostname": f"{QWEN_ISVC_NAME}-predictor",
323+
"port": GUARDRAILS_ORCHESTRATOR_PORT,
324+
}
325+
},
326+
"detectors": {
327+
"prompt_injection": {
328+
"type": "text_contents",
329+
"service": {
330+
"hostname": "prompt-injection-detector-predictor",
331+
"port": 8000,
332+
},
333+
"chunker_id": "whole_doc_chunker",
334+
"default_threshold": 0.5,
335+
}
336+
},
337+
})
338+
},
339+
) as cm:
340+
yield cm
341+
342+
183343
@pytest.fixture(scope="class")
184344
def guardrails_gateway_config(
185345
admin_client: DynamicClient, model_namespace: Namespace

0 commit comments

Comments
 (0)