Skip to content

Commit 1546283

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent c0bcd23 commit 1546283

File tree

4 files changed

+45
-44
lines changed

4 files changed

+45
-44
lines changed

tests/fixtures/guardrails.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from ocp_resources.resource import ResourceEditor
1414
from ocp_resources.route import Route
1515

16-
from tests.model_explainability.guardrails.constants import PROMPT_INJECTION_DETECTOR, HAP_DETECTOR
17-
from utilities.constants import Annotations, Labels, VLLMGPUConfig, BUILTIN_DETECTOR_CONFIG
16+
from tests.model_explainability.guardrails.constants import HAP_DETECTOR, PROMPT_INJECTION_DETECTOR
17+
from utilities.constants import BUILTIN_DETECTOR_CONFIG, Annotations, Labels, VLLMGPUConfig
1818
from utilities.guardrails import check_guardrails_health_endpoint
1919

2020
GUARDRAILS_ORCHESTRATOR_NAME: str = "guardrails-orchestrator"
@@ -220,6 +220,7 @@ def guardrails_orchestrator_gateway_route(
220220
ensure_exists=True,
221221
)
222222

223+
223224
def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
224225
return {
225226
"service": {
@@ -228,6 +229,7 @@ def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
228229
}
229230
}
230231

232+
231233
@pytest.fixture(scope="class")
232234
def orchestrator_config_gpu(
233235
request: FixtureRequest,
@@ -285,6 +287,7 @@ def orchestrator_config_gpu(
285287
) as cm:
286288
yield cm
287289

290+
288291
@pytest.fixture(scope="class")
289292
def orchestrator_config_builtin_gpu(
290293
request: FixtureRequest,

tests/fixtures/inference.py

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22
from typing import Any
33

44
import pytest
5-
import yaml
6-
from _pytest.fixtures import FixtureRequest
75
from kubernetes.dynamic import DynamicClient
8-
from ocp_resources.config_map import ConfigMap
96
from ocp_resources.data_science_cluster import DataScienceCluster
107
from ocp_resources.deployment import Deployment
118
from ocp_resources.inference_service import InferenceService
@@ -19,12 +16,12 @@
1916
from simple_logger.logger import get_logger
2017
from timeout_sampler import retry
2118

22-
from tests.model_explainability.guardrails.constants import PROMPT_INJECTION_DETECTOR, HAP_DETECTOR
2319
from utilities.constants import (
2420
QWEN_MODEL_NAME,
2521
KServeDeploymentType,
2622
LLMdInferenceSimConfig,
27-
RuntimeTemplates, VLLMGPUConfig, BUILTIN_DETECTOR_CONFIG,
23+
RuntimeTemplates,
24+
VLLMGPUConfig,
2825
)
2926
from utilities.inference_utils import create_isvc
3027
from utilities.infra import get_data_science_cluster, wait_for_dsc_status_ready
@@ -250,6 +247,7 @@ def _wait_for_kserve_upgrade(dsc_resource: DataScienceCluster):
250247
LOGGER.info("DSC already configured for Headed mode")
251248
yield dsc
252249

250+
253251
@pytest.fixture(scope="class")
254252
def vllm_gpu_runtime(
255253
admin_client: DynamicClient,
@@ -263,28 +261,25 @@ def vllm_gpu_runtime(
263261
template_name=RuntimeTemplates.VLLM_CUDA,
264262
deployment_type=KServeDeploymentType.RAW_DEPLOYMENT,
265263
runtime_image="registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:ec799bb5eeb7e25b4b25a8917ab5161da6b6f1ab830cbba61bba371cffb0c34d",
266-
containers={
267-
"kserve-container": {
268-
"command": ["python", "-m", "vllm.entrypoints.openai.api_server"],
269-
"args": [
270-
"--port=8080",
271-
"--model=/mnt/models",
272-
"--tokenizer=/mnt/models",
273-
"--served-model-name={{.Name}}",
274-
"--dtype=float16",
275-
"--enforce-eager",
276-
],
277-
"ports": [{"containerPort": 8080, "protocol": "TCP"}],
278-
"resources": {
279-
"limits": {
280-
"nvidia.com/gpu": "1"
281-
}
282-
},
283-
}
264+
containers={
265+
"kserve-container": {
266+
"command": ["python", "-m", "vllm.entrypoints.openai.api_server"],
267+
"args": [
268+
"--port=8080",
269+
"--model=/mnt/models",
270+
"--tokenizer=/mnt/models",
271+
"--served-model-name={{.Name}}",
272+
"--dtype=float16",
273+
"--enforce-eager",
274+
],
275+
"ports": [{"containerPort": 8080, "protocol": "TCP"}],
276+
"resources": {"limits": {"nvidia.com/gpu": "1"}},
284277
}
278+
},
285279
) as runtime:
286280
yield runtime
287281

282+
288283
@pytest.fixture(scope="class")
289284
def qwen_gpu_isvc(
290285
admin_client: DynamicClient,
@@ -302,21 +297,22 @@ def qwen_gpu_isvc(
302297
storage_uri="oci://quay.io/trustyai_testing/models/qwen2.5-3b-instruct@sha256:6f9d9843599a9959de23c76d6b5adb556505482a7e732b2fcbca695a9c4ce545",
303298
enable_auth=False,
304299
wait_for_predictor_pods=True,
305-
resources={
306-
"requests": {
307-
"cpu": "2",
308-
"memory": "8Gi",
309-
"nvidia.com/gpu": "1",
310-
},
311-
"limits": {
312-
"cpu": "4",
313-
"memory": "12Gi",
314-
"nvidia.com/gpu": "1",
315-
},
300+
resources={
301+
"requests": {
302+
"cpu": "2",
303+
"memory": "8Gi",
304+
"nvidia.com/gpu": "1",
316305
},
306+
"limits": {
307+
"cpu": "4",
308+
"memory": "12Gi",
309+
"nvidia.com/gpu": "1",
310+
},
311+
},
317312
) as isvc:
318313
yield isvc
319314

315+
320316
def get_vllm_chat_config(namespace: str) -> dict[str, Any]:
321317
return {
322318
"service": {

tests/model_explainability/guardrails/test_guardrails_gpu.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
HARMLESS_PROMPT,
99
PII_ENDPOINT,
1010
PII_INPUT_DETECTION_PROMPT,
11+
PII_OUTPUT_DETECTION_PROMPT_QWEN,
1112
PROMPT_INJECTION_DETECTOR,
1213
PROMPT_INJECTION_INPUT_DETECTION_PROMPT,
13-
STANDALONE_DETECTION_ENDPOINT, PII_OUTPUT_DETECTION_PROMPT_QWEN,
14+
STANDALONE_DETECTION_ENDPOINT,
1415
)
1516
from tests.model_explainability.guardrails.utils import (
1617
create_detector_config,
@@ -20,12 +21,12 @@
2021
send_and_verify_unsuitable_output_detection,
2122
verify_health_info_response,
2223
)
23-
2424
from utilities.constants import (
25-
VLLMGPUConfig,
25+
VLLMGPUConfig,
2626
)
2727
from utilities.plugins.constant import OpenAIEnpoints
2828

29+
2930
@pytest.mark.parametrize(
3031
"model_namespace, orchestrator_config_builtin_gpu, guardrails_gateway_config, guardrails_orchestrator",
3132
[
@@ -160,6 +161,7 @@ def test_guardrails_builtin_detectors_negative_detection(
160161
model=VLLMGPUConfig.model_name,
161162
)
162163

164+
163165
@pytest.mark.gpu
164166
@pytest.mark.rawdeployment
165167
@pytest.mark.usefixtures("patched_dsc_kserve_headed", "guardrails_gateway_config")
@@ -201,7 +203,6 @@ def test_guardrails_builtin_detectors_negative_detection(
201203
],
202204
indirect=True,
203205
)
204-
205206
class TestGuardrailsOrchestratorHuggingFaceGPU:
206207
"""
207208
These tests verify that the GuardrailsOrchestrator works as expected when using HuggingFace detectors

utilities/constants.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,8 @@ class LLMdInferenceSimConfig:
496496
LLM_D_CHAT_GENERATION_CONFIG: dict[str, Any] = {
497497
"service": {"hostname": f"{LLMdInferenceSimConfig.isvc_name}-predictor", "port": 80}
498498
}
499+
500+
499501
class VLLMGPUConfig:
500502
name: str = "vllm-gpu"
501503
port: int = 80
@@ -507,12 +509,11 @@ class VLLMGPUConfig:
507509
def get_hostname(cls, namespace: str) -> str:
508510
return f"{cls.isvc_name}-predictor.{namespace}.svc.cluster.local"
509511

512+
510513
VLLM_CHAT_GENERATION_CONFIG: dict[str, Any] = {
511-
"service": {
512-
"hostname": VLLMGPUConfig.get_hostname("test-guardrails-huggingface"),
513-
"port": VLLMGPUConfig.port
514-
}
514+
"service": {"hostname": VLLMGPUConfig.get_hostname("test-guardrails-huggingface"), "port": VLLMGPUConfig.port}
515515
}
516516

517+
517518
class PodNotFound(Exception):
518519
"""Pod not found"""

0 commit comments

Comments
 (0)