Skip to content

Commit ca8b9ca

Browse files
Sandeep20013dbasunagpre-commit-ci[bot]kpunwatk
authored andcommitted
Add GPU tests with vLLM runtime and Qwen model deployment for Guardrails (#1259)
* chore: branching 3.4ea1 and generating new tag (#1171) Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * test(guardrails): add GPU Integration test using vLLM runtime Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * update: include remaining changes Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * chore: remove oc.tar Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * sync Makefile with main branch Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * Fix fixture issues / other suggested changes Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * Trigger DCO check Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * precommit.ci changes Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * Refactor: move shared constants to utilities/constants.py and update guardrails tests Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Unify GPU orchestrator config fixtures and update guardrails GPU tests Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * Remove unnecessary guardrails_gateway_config and refactor GPU orchestrator config fixture Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added docstring for orchestrator_config_gpu Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests/fixtures/guardrails.py: replace early return with if/else in orchestrator_config_gpu fixture Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix E501 line too long flake8 violation Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test(fixtures): simplify orchestrator_config_gpu docstring Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> --------- Signed-off-by: Sandeep20013 <sandeepm20013@gmail.com> Co-authored-by: Debarati Basu-Nag <dbasunag@redhat.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Karishma Punwatkar <kpunwatk@redhat.com>
1 parent 89c943f commit ca8b9ca

File tree

7 files changed

+493
-5
lines changed

7 files changed

+493
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,4 @@ QWEN.md
178178

179179
# Must-Gather Artifacts
180180
must-gather-collected/
181+
oc.tar

tests/fixtures/guardrails.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any
33

44
import pytest
5+
import yaml
56
from _pytest.fixtures import FixtureRequest
67
from kubernetes.dynamic import DynamicClient
78
from ocp_resources.config_map import ConfigMap
@@ -12,7 +13,14 @@
1213
from ocp_resources.resource import ResourceEditor
1314
from ocp_resources.route import Route
1415

15-
from utilities.constants import Annotations, Labels
16+
from tests.fixtures.inference import get_vllm_chat_config
17+
from utilities.constants import (
18+
BUILTIN_DETECTOR_CONFIG,
19+
HAP_DETECTOR,
20+
PROMPT_INJECTION_DETECTOR,
21+
Annotations,
22+
Labels,
23+
)
1624
from utilities.guardrails import check_guardrails_health_endpoint
1725

1826
GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"
@@ -46,6 +54,14 @@ def guardrails_orchestrator(
4654
orchestrator_config = request.getfixturevalue(argname="orchestrator_config")
4755
gorch_kwargs["orchestrator_config"] = orchestrator_config.name
4856

57+
elif request.param.get("orchestrator_config_gpu"):
58+
orchestrator_config = request.getfixturevalue(argname="orchestrator_config_gpu")
59+
gorch_kwargs["orchestrator_config"] = orchestrator_config.name
60+
61+
elif request.param.get("orchestrator_config_builtin_gpu"):
62+
orchestrator_config = request.getfixturevalue(argname="orchestrator_config_builtin_gpu")
63+
gorch_kwargs["orchestrator_config"] = orchestrator_config.name
64+
4965
if request.param.get("enable_guardrails_gateway"):
5066
gorch_kwargs["enable_guardrails_gateway"] = True
5167

@@ -209,3 +225,79 @@ def guardrails_orchestrator_gateway_route(
209225
wait_for_resource=True,
210226
ensure_exists=True,
211227
)
228+
229+
230+
@pytest.fixture(scope="class")
231+
def orchestrator_config_gpu(
232+
request: FixtureRequest,
233+
admin_client: DynamicClient,
234+
model_namespace: Namespace,
235+
teardown_resources: bool,
236+
pytestconfig: pytest.Config,
237+
) -> Generator[ConfigMap, Any, Any]:
238+
"""
239+
Creates the Guardrails Orchestrator ConfigMap for tests.
240+
241+
Builds configuration dynamically based on test parameters, supporting either
242+
built-in detectors or external detector services. Reuses existing ConfigMap
243+
during post-upgrade scenarios.
244+
"""
245+
if pytestconfig.option.post_upgrade:
246+
cm = ConfigMap(
247+
client=admin_client,
248+
name="fms-orchestr8-config-nlp",
249+
namespace=model_namespace.name,
250+
ensure_exists=True,
251+
)
252+
yield cm
253+
cm.clean_up()
254+
255+
else:
256+
param = getattr(request, "param", {}) or {}
257+
258+
if param and param.get("orchestrator_config_data"):
259+
orchestrator_data = param["orchestrator_config_data"]
260+
261+
else:
262+
# Decide detectors dynamically
263+
if param and param.get("use_builtin_detectors"):
264+
detectors = BUILTIN_DETECTOR_CONFIG
265+
else:
266+
detectors = {
267+
PROMPT_INJECTION_DETECTOR: {
268+
"type": "text_contents",
269+
"service": {
270+
"hostname": (
271+
f"{PROMPT_INJECTION_DETECTOR}-predictor.{model_namespace.name}.svc.cluster.local"
272+
),
273+
"port": 80,
274+
},
275+
"chunker_id": "whole_doc_chunker",
276+
"default_threshold": 0.5,
277+
},
278+
HAP_DETECTOR: {
279+
"type": "text_contents",
280+
"service": {
281+
"hostname": f"{HAP_DETECTOR}-predictor.{model_namespace.name}.svc.cluster.local",
282+
"port": 80,
283+
},
284+
"chunker_id": "whole_doc_chunker",
285+
"default_threshold": 0.5,
286+
},
287+
}
288+
289+
orchestrator_data = {
290+
"config.yaml": yaml.dump({
291+
"openai": get_vllm_chat_config(model_namespace.name),
292+
"detectors": detectors,
293+
})
294+
}
295+
296+
with ConfigMap(
297+
client=admin_client,
298+
name="fms-orchestr8-config-nlp",
299+
namespace=model_namespace.name,
300+
data=orchestrator_data,
301+
teardown=teardown_resources,
302+
) as cm:
303+
yield cm

tests/fixtures/inference.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
KServeDeploymentType,
2222
LLMdInferenceSimConfig,
2323
RuntimeTemplates,
24+
VLLMGPUConfig,
2425
)
2526
from utilities.inference_utils import create_isvc
2627
from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
@@ -245,3 +246,83 @@ def _wait_for_kserve_upgrade(dsc_resource: DataScienceCluster):
245246
else:
246247
LOGGER.info("DSC already configured for Headed mode")
247248
yield dsc
249+
250+
251+
@pytest.fixture(scope="class")
252+
def vllm_gpu_runtime(
253+
admin_client: DynamicClient,
254+
model_namespace: Namespace,
255+
) -> Generator[ServingRuntime, Any, Any]:
256+
257+
with ServingRuntimeFromTemplate(
258+
client=admin_client,
259+
name="vllm-runtime-gpu",
260+
namespace=model_namespace.name,
261+
template_name=RuntimeTemplates.VLLM_CUDA,
262+
deployment_type=KServeDeploymentType.RAW_DEPLOYMENT,
263+
runtime_image=(
264+
"registry.redhat.io/rhaiis/vllm-cuda-rhel9@"
265+
"sha256:ec799bb5eeb7e25b4b25a8917ab5161da6b6f1ab830cbba61bba371cffb0c34d"
266+
),
267+
containers={
268+
"kserve-container": {
269+
"command": ["python", "-m", "vllm.entrypoints.openai.api_server"],
270+
"args": [
271+
"--port=8080",
272+
"--model=/mnt/models",
273+
"--tokenizer=/mnt/models",
274+
"--served-model-name={{.Name}}",
275+
"--dtype=float16",
276+
"--enforce-eager",
277+
],
278+
"ports": [{"containerPort": 8080, "protocol": "TCP"}],
279+
"resources": {"limits": {"nvidia.com/gpu": "1"}},
280+
}
281+
},
282+
) as runtime:
283+
yield runtime
284+
285+
286+
@pytest.fixture(scope="class")
287+
def qwen_gpu_isvc(
288+
admin_client: DynamicClient,
289+
model_namespace: Namespace,
290+
vllm_gpu_runtime: ServingRuntime,
291+
) -> Generator[InferenceService, Any, Any]:
292+
293+
with create_isvc(
294+
client=admin_client,
295+
name="qwen3b",
296+
namespace=model_namespace.name,
297+
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
298+
model_format="vLLM",
299+
runtime=vllm_gpu_runtime.name,
300+
storage_uri=(
301+
"oci://quay.io/trustyai_testing/models/qwen2.5-3b-instruct@"
302+
"sha256:6f9d9843599a9959de23c76d6b5adb556505482a7e732b2fcbca695a9c4ce545"
303+
),
304+
enable_auth=False,
305+
wait_for_predictor_pods=True,
306+
resources={
307+
"requests": {
308+
"cpu": "2",
309+
"memory": "8Gi",
310+
"nvidia.com/gpu": "1",
311+
},
312+
"limits": {
313+
"cpu": "4",
314+
"memory": "12Gi",
315+
"nvidia.com/gpu": "1",
316+
},
317+
},
318+
) as isvc:
319+
yield isvc
320+
321+
322+
def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
323+
return {
324+
"service": {
325+
"hostname": VLLMGPUConfig.get_hostname(namespace),
326+
"port": VLLMGPUConfig.port,
327+
}
328+
}

tests/model_explainability/guardrails/constants.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
PII_ENDPOINT: str = "/pii"
1313
AUTOCONFIG_GATEWAY_ENDPOINT: str = "/all"
1414
STANDALONE_DETECTION_ENDPOINT: str = "api/v2/text/detection/content"
15-
PROMPT_INJECTION_DETECTOR: str = "prompt-injection-detector"
16-
HAP_DETECTOR: str = "hap-detector"
1715

1816

1917
@dataclass
@@ -38,6 +36,14 @@ class GuardrailsDetectionPrompt:
3836
detection_text=EXAMPLE_EMAIL_ADDRESS,
3937
)
4038

39+
PII_OUTPUT_DETECTION_PROMPT_QWEN: GuardrailsDetectionPrompt = GuardrailsDetectionPrompt(
40+
content="Give an example of a email address for a character in a story. Only return the email address string.",
41+
detector_id="regex",
42+
detection_name="email_address",
43+
detection_type="pii",
44+
detection_text="",
45+
)
46+
4147
PII_OUTPUT_DETECTION_PROMPT: GuardrailsDetectionPrompt = GuardrailsDetectionPrompt(
4248
content="Give me one email and nothing else.",
4349
detector_id="regex",

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@
88
AUTOCONFIG_DETECTOR_LABEL,
99
AUTOCONFIG_GATEWAY_ENDPOINT,
1010
CHAT_COMPLETIONS_DETECTION_ENDPOINT,
11-
HAP_DETECTOR,
1211
HAP_INPUT_DETECTION_PROMPT,
1312
HARMLESS_PROMPT,
1413
PII_ENDPOINT,
1514
PII_INPUT_DETECTION_PROMPT,
1615
PII_OUTPUT_DETECTION_PROMPT,
17-
PROMPT_INJECTION_DETECTOR,
1816
PROMPT_INJECTION_INPUT_DETECTION_PROMPT,
1917
STANDALONE_DETECTION_ENDPOINT,
2018
)
@@ -29,7 +27,9 @@
2927
from tests.model_explainability.utils import validate_tai_component_images
3028
from utilities.constants import (
3129
BUILTIN_DETECTOR_CONFIG,
30+
HAP_DETECTOR,
3231
LLM_D_CHAT_GENERATION_CONFIG,
32+
PROMPT_INJECTION_DETECTOR,
3333
LLMdInferenceSimConfig,
3434
Timeout,
3535
)

0 commit comments

Comments
 (0)