2424from utilities .inference_utils import create_isvc
2525from utilities .serving_runtime import ServingRuntimeFromTemplate
2626
27+ ORCHESTRATOR_CONFIGMAP_NAME = "fms-orchestr8-config-nlp"
28+
29+ QWEN_ISVC_NAME = "qwen-isvc"
30+
2731GORCH_NAME = "gorch-test"
2832
2933USER_ONE : str = "user-one"
3034GUARDRAILS_ORCHESTRATOR_PORT : int = 8032
3135
3236
3337@pytest .fixture (scope = "class" )
34- def guardrails_orchestrator (
38+ def guardrails_orchestrator_with_builtin_detectors (
3539 admin_client : DynamicClient ,
3640 model_namespace : Namespace ,
37- orchestrator_configmap : ConfigMap ,
41+ gorch_with_builtin_detectors_configmap : ConfigMap ,
3842 guardrails_gateway_config : ConfigMap ,
3943) -> Generator [GuardrailsOrchestrator , Any , Any ]:
4044 with GuardrailsOrchestrator (
@@ -43,7 +47,7 @@ def guardrails_orchestrator(
4347 namespace = model_namespace .name ,
4448 enable_built_in_detectors = True ,
4549 enable_guardrails_gateway = True ,
46- orchestrator_config = orchestrator_configmap .name ,
50+ orchestrator_config = gorch_with_builtin_detectors_configmap .name ,
4751 guardrails_gateway_config = guardrails_gateway_config .name ,
4852 replicas = 1 ,
4953 wait_for_resource = True ,
@@ -53,15 +57,36 @@ def guardrails_orchestrator(
5357 yield gorch
5458
5559
60+ @pytest .fixture (scope = "class" )
61+ def guardrails_orchestrator_with_hf_detectors (
62+ admin_client : DynamicClient ,
63+ model_namespace : Namespace ,
64+ gorch_with_hf_detectors_configmap : ConfigMap ,
65+ ) -> Generator [GuardrailsOrchestrator , Any , Any ]:
66+ with GuardrailsOrchestrator (
67+ client = admin_client ,
68+ name = GORCH_NAME ,
69+ namespace = model_namespace .name ,
70+ enable_built_in_detectors = False ,
71+ enable_guardrails_gateway = False ,
72+ orchestrator_config = gorch_with_hf_detectors_configmap .name ,
73+ replicas = 1 ,
74+ wait_for_resource = True ,
75+ ) as gorch :
76+ orchestrator_deployment = Deployment (name = gorch .name , namespace = gorch .namespace , wait_for_resource = True )
77+ orchestrator_deployment .wait_for_replicas ()
78+ yield gorch
79+
80+
5681@pytest .fixture (scope = "class" )
5782def guardrails_orchestrator_health_route (
5883 admin_client : DynamicClient ,
5984 model_namespace : Namespace ,
60- guardrails_orchestrator : GuardrailsOrchestrator ,
85+ guardrails_orchestrator_with_builtin_detectors : GuardrailsOrchestrator ,
6186) -> Generator [Route , Any , Any ]:
6287 yield Route (
63- name = f"{ guardrails_orchestrator .name } -health" ,
64- namespace = guardrails_orchestrator .namespace ,
88+ name = f"{ guardrails_orchestrator_with_builtin_detectors .name } -health" ,
89+ namespace = guardrails_orchestrator_with_builtin_detectors .namespace ,
6590 wait_for_resource = True ,
6691 )
6792
@@ -70,18 +95,47 @@ def guardrails_orchestrator_health_route(
7095def guardrails_orchestrator_route (
7196 admin_client : DynamicClient ,
7297 model_namespace : Namespace ,
73- guardrails_orchestrator : GuardrailsOrchestrator ,
98+ guardrails_orchestrator_with_builtin_detectors : GuardrailsOrchestrator ,
7499) -> Generator [Route , Any , Any ]:
75100 yield Route (
76- name = f"{ guardrails_orchestrator .name } " ,
77- namespace = guardrails_orchestrator .namespace ,
101+ name = f"{ guardrails_orchestrator_with_builtin_detectors .name } " ,
102+ namespace = guardrails_orchestrator_with_builtin_detectors .namespace ,
103+ wait_for_resource = True ,
104+ )
105+
106+
107+ @pytest .fixture (scope = "class" )
108+ def guardrails_orchestrator_with_hf_detectors_route (
109+ admin_client : DynamicClient ,
110+ model_namespace : Namespace ,
111+ guardrails_orchestrator_with_hf_detectors : GuardrailsOrchestrator ,
112+ ) -> Generator [Route , Any , Any ]:
113+ yield Route (
114+ name = f"{ guardrails_orchestrator_with_hf_detectors .name } " ,
115+ namespace = guardrails_orchestrator_with_hf_detectors .namespace ,
116+ wait_for_resource = True ,
117+ )
118+
119+
120+ @pytest .fixture (scope = "class" )
121+ def prompt_injection_detector_route (
122+ admin_client : DynamicClient ,
123+ model_namespace : Namespace ,
124+ prompt_injection_detector_isvc : InferenceService ,
125+ ) -> Generator [Route , Any , Any ]:
126+ yield Route (
127+ name = "prompt-injection-detector-route" ,
128+ namespace = model_namespace .name ,
129+ service = prompt_injection_detector_isvc .name ,
78130 wait_for_resource = True ,
79131 )
80132
81133
82134@pytest .fixture (scope = "class" )
83135def guardrails_orchestrator_pod (
84- admin_client : DynamicClient , model_namespace : Namespace , guardrails_orchestrator : GuardrailsOrchestrator
136+ admin_client : DynamicClient ,
137+ model_namespace : Namespace ,
138+ guardrails_orchestrator_with_builtin_detectors : GuardrailsOrchestrator ,
85139) -> Pod :
86140 return list (Pod .get (namespace = model_namespace .name , label_selector = f"app.kubernetes.io/instance={ GORCH_NAME } " ))[0 ]
87141
@@ -97,7 +151,7 @@ def qwen_isvc(
97151) -> Generator [InferenceService , Any , Any ]:
98152 with create_isvc (
99153 client = admin_client ,
100- name = "llm" ,
154+ name = QWEN_ISVC_NAME ,
101155 namespace = model_namespace .name ,
102156 deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
103157 model_format = "vLLM" ,
@@ -114,6 +168,37 @@ def qwen_isvc(
114168 yield isvc
115169
116170
171+ @pytest .fixture (scope = "class" )
172+ def prompt_injection_detector_isvc (
173+ admin_client : DynamicClient ,
174+ model_namespace : Namespace ,
175+ minio_data_connection : Secret ,
176+ huggingface_sr : ServingRuntime ,
177+ ) -> Generator [InferenceService , Any , Any ]:
178+ with create_isvc (
179+ client = admin_client ,
180+ name = "prompt-injection-detector" ,
181+ namespace = model_namespace .name ,
182+ deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
183+ model_format = "guardrails-detector-huggingface" ,
184+ runtime = huggingface_sr .name ,
185+ storage_key = minio_data_connection .name ,
186+ storage_path = "deberta-v3-base-prompt-injection-v2" ,
187+ wait_for_predictor_pods = False ,
188+ enable_auth = False ,
189+ resources = {
190+ "requests" : {"cpu" : "1" , "memory" : "2Gi" , "nvidia.com/gpu" : "0" },
191+ "limits" : {"cpu" : "1" , "memory" : "2Gi" , "nvidia.com/gpu" : "0" },
192+ },
193+ max_replicas = 1 ,
194+ min_replicas = 1 ,
195+ labels = {
196+ "opendatahub.io/dashboard" : "true" ,
197+ },
198+ ) as isvc :
199+ yield isvc
200+
201+
117202@pytest .fixture (scope = "class" )
118203def vllm_runtime (
119204 admin_client : DynamicClient ,
@@ -146,20 +231,61 @@ def vllm_runtime(
146231
147232
148233@pytest .fixture (scope = "class" )
149- def orchestrator_configmap (
234+ def huggingface_sr (
235+ admin_client : DynamicClient ,
236+ model_namespace : Namespace ,
237+ ) -> Generator [ServingRuntime , Any , Any ]:
238+ with ServingRuntime (
239+ client = admin_client ,
240+ name = "guardrails-detector-runtime-prompt-injection" ,
241+ namespace = model_namespace .name ,
242+ containers = [
243+ {
244+ "name" : "kserve-container" ,
245+ "image" : "quay.io/trustyai/guardrails-detector-huggingface-runtime:v0.2.0" ,
246+ "command" : ["uvicorn" , "app:app" ],
247+ "args" : [
248+ "--workers=4" ,
249+ "--host=0.0.0.0" ,
250+ "--port=8000" ,
251+ "--log-config=/common/log_conf.yaml" ,
252+ ],
253+ "env" : [
254+ {"name" : "MODEL_DIR" , "value" : "/mnt/models" },
255+ {"name" : "HF_HOME" , "value" : "/tmp/hf_home" },
256+ ],
257+ "ports" : [{"containerPort" : 8000 , "protocol" : "TCP" }],
258+ }
259+ ],
260+ supported_model_formats = [{"name" : "guardrails-detector-huggingface" , "autoSelect" : True }],
261+ multi_model = False ,
262+ annotations = {
263+ "openshift.io/display-name" : "Guardrails Detector ServingRuntime for KServe" ,
264+ "opendatahub.io/recommended-accelerators" : '["nvidia.com/gpu"]' ,
265+ "prometheus.io/port" : "8080" ,
266+ "prometheus.io/path" : "/metrics" ,
267+ },
268+ label = {
269+ "opendatahub.io/dashboard" : "true" ,
270+ },
271+ ) as serving_runtime :
272+ yield serving_runtime
273+
274+
275+ @pytest .fixture (scope = "class" )
276+ def gorch_with_builtin_detectors_configmap (
150277 admin_client : DynamicClient ,
151278 model_namespace : Namespace ,
152- qwen_isvc : InferenceService ,
153279) -> Generator [ConfigMap , Any , Any ]:
154280 with ConfigMap (
155281 client = admin_client ,
156- name = "fms-orchestr8-config-nlp" ,
282+ name = ORCHESTRATOR_CONFIGMAP_NAME ,
157283 namespace = model_namespace .name ,
158284 data = {
159285 "config.yaml" : yaml .dump ({
160286 "chat_generation" : {
161287 "service" : {
162- "hostname" : f"{ qwen_isvc . name } -predictor.{ model_namespace .name } .svc.cluster.local" ,
288+ "hostname" : f"{ QWEN_ISVC_NAME } -predictor.{ model_namespace .name } .svc.cluster.local" ,
163289 "port" : GUARDRAILS_ORCHESTRATOR_PORT ,
164290 }
165291 },
@@ -180,6 +306,40 @@ def orchestrator_configmap(
180306 yield cm
181307
182308
309+ @pytest .fixture (scope = "class" )
310+ def gorch_with_hf_detectors_configmap (
311+ admin_client : DynamicClient ,
312+ model_namespace : Namespace ,
313+ ) -> Generator [ConfigMap , Any , Any ]:
314+ with ConfigMap (
315+ client = admin_client ,
316+ name = ORCHESTRATOR_CONFIGMAP_NAME ,
317+ namespace = model_namespace .name ,
318+ data = {
319+ "config.yaml" : yaml .dump ({
320+ "chat_generation" : {
321+ "service" : {
322+ "hostname" : f"{ QWEN_ISVC_NAME } -predictor" ,
323+ "port" : GUARDRAILS_ORCHESTRATOR_PORT ,
324+ }
325+ },
326+ "detectors" : {
327+ "prompt_injection" : {
328+ "type" : "text_contents" ,
329+ "service" : {
330+ "hostname" : "prompt-injection-detector-predictor" ,
331+ "port" : 8000 ,
332+ },
333+ "chunker_id" : "whole_doc_chunker" ,
334+ "default_threshold" : 0.5 ,
335+ }
336+ },
337+ })
338+ },
339+ ) as cm :
340+ yield cm
341+
342+
183343@pytest .fixture (scope = "class" )
184344def guardrails_gateway_config (
185345 admin_client : DynamicClient , model_namespace : Namespace
0 commit comments