opendatahub-io
diff --git a/‎tests/fixtures/inference.py‎
Lines changed: 105 additions & 1 deletion b/‎tests/fixtures/inference.py‎
Lines changed: 105 additions & 1 deletion
diff --git a/‎tests/model_explainability/guardrails/conftest.py‎
Lines changed: 4 additions & 7 deletions b/‎tests/model_explainability/guardrails/conftest.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎tests/model_explainability/guardrails/constants.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/model_explainability/guardrails/constants.py‎
Lines changed: 1 addition & 1 deletion
@@ -9,7 +9,12 @@
 from ocp_resources.service import Service
 from ocp_resources.serving_runtime import ServingRuntime
 
-from utilities.constants import RuntimeTemplates, KServeDeploymentType, QWEN_MODEL_NAME
+from utilities.constants import (
+    RuntimeTemplates,
+    KServeDeploymentType,
+    QWEN_MODEL_NAME,
+    LLMdInferenceSimConfig,
+)
 from utilities.inference_utils import create_isvc
 from utilities.serving_runtime import ServingRuntimeFromTemplate
 
@@ -73,3 +78,102 @@ def qwen_isvc(
 @pytest.fixture(scope="class")
 def qwen_isvc_url(qwen_isvc: InferenceService) -> str:
     return f"http://{qwen_isvc.name}-predictor.{qwen_isvc.namespace}.svc.cluster.local:8032/v1"
+
+
+@pytest.fixture(scope="class")
+def llm_d_inference_sim_serving_runtime(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+) -> Generator[ServingRuntime, Any, Any]:
+    """Serving runtime for LLM-d Inference Simulator.
+
+    While llm-d-inference-sim supports any model name, the /tokenizers endpoint will only support two models
+        - qwen2.5-0.5b-instruct
+        - Qwen2.5-1.5B-Instruct
+
+    For other models, ensure:
+        - the correct write permissions on the Pod
+        - the model name matches what is available on HuggingFace (e.g., Qwen/Qwen2.5-1.5B-Instruct)
+        - you have set a writeable "--tokenizers-cache-dir"
+        - the cluster can pull from HuggingFace
+
+    """
+    with ServingRuntime(
+        client=admin_client,
+        name=LLMdInferenceSimConfig.serving_runtime_name,
+        namespace=model_namespace.name,
+        annotations={
+            "description": "LLM-d Simulator KServe",
+            "opendatahub.io/template-display-name": "LLM-d Inference Simulator Runtime",
+            "openshift.io/display-name": "LLM-d Inference Simulator Runtime",
+            "serving.kserve.io/enable-agent": "false",
+        },
+        label={
+            "app.kubernetes.io/component": LLMdInferenceSimConfig.name,
+            "app.kubernetes.io/instance": "llm-d-inference-sim-kserve",
+            "app.kubernetes.io/name": "llm-d-sim",
+            "app.kubernetes.io/version": "1.0.0",
+            "opendatahub.io/dashboard": "true",
+        },
+        spec_annotations={
+            "prometheus.io/path": "/metrics",
+            "prometheus.io/port": "8000",
+        },
+        spec_labels={
+            "opendatahub.io/dashboard": "true",
+        },
+        containers=[
+            {
+                "name": "kserve-container",
+                "image": "quay.io/trustyai_testing/llmd-inference-sim-dataset-builtin"
+                "@sha256:dfaa32cf0878a2fb522133e34369412c90e8ffbfa18b690b92602cf7c019fbbe",
+                "imagePullPolicy": "Always",
+                "args": ["--model", LLMdInferenceSimConfig.model_name, "--port", str(LLMdInferenceSimConfig.port)],
+                "ports": [{"containerPort": LLMdInferenceSimConfig.port, "protocol": "TCP"}],
+                "securityContext": {
+                    "allowPrivilegeEscalation": False,
+                },
+                "livenessProbe": {
+                    "failureThreshold": 3,
+                    "httpGet": {"path": "/health", "port": LLMdInferenceSimConfig.port, "scheme": "HTTP"},
+                    "initialDelaySeconds": 15,
+                    "periodSeconds": 20,
+                    "timeoutSeconds": 5,
+                },
+                "readinessProbe": {
+                    "failureThreshold": 3,
+                    "httpGet": {"path": "/health", "port": LLMdInferenceSimConfig.port, "scheme": "HTTP"},
+                    "initialDelaySeconds": 5,
+                    "periodSeconds": 10,
+                    "timeoutSeconds": 5,
+                },
+            }
+        ],
+        multi_model=False,
+        supported_model_formats=[{"autoSelect": True, "name": LLMdInferenceSimConfig.name}],
+    ) as serving_runtime:
+        yield serving_runtime
+
+
+@pytest.fixture(scope="class")
+def llm_d_inference_sim_isvc(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    llm_d_inference_sim_serving_runtime: ServingRuntime,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name=LLMdInferenceSimConfig.isvc_name,
+        namespace=model_namespace.name,
+        deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
+        model_format=LLMdInferenceSimConfig.name,
+        runtime=llm_d_inference_sim_serving_runtime.name,
+        wait_for_predictor_pods=True,
+        min_replicas=1,
+        max_replicas=1,
+        resources={
+            "requests": {"cpu": "1", "memory": "1Gi"},
+            "limits": {"cpu": "1", "memory": "1Gi"},
+        },
+    ) as isvc:
+        yield isvc
@@ -5,7 +5,6 @@
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.namespace import Namespace
 from ocp_resources.route import Route
-from ocp_resources.secret import Secret
 from ocp_resources.serving_runtime import ServingRuntime
 
 from tests.model_explainability.guardrails.constants import AUTOCONFIG_DETECTOR_LABEL
@@ -39,7 +38,6 @@ def huggingface_sr(
 def prompt_injection_detector_isvc(
     admin_client: DynamicClient,
     model_namespace: Namespace,
-    minio_data_connection: Secret,
     huggingface_sr: ServingRuntime,
 ) -> Generator[InferenceService, Any, Any]:
     with create_isvc(
@@ -49,8 +47,8 @@ def prompt_injection_detector_isvc(
         deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
         model_format="guardrails-detector-huggingface",
         runtime=huggingface_sr.name,
-        storage_key=minio_data_connection.name,
-        storage_path="deberta-v3-base-prompt-injection-v2",
+        storage_uri="oci://quay.io/trustyai_testing/detectors/deberta-v3-base-prompt-injection-v2"
+        "@sha256:8737d6c7c09edf4c16dc87426624fd8ed7d118a12527a36b670be60f089da215",
         wait_for_predictor_pods=False,
         enable_auth=False,
         resources={
@@ -93,7 +91,6 @@ def openshift_ca_bundle_file(
 def hap_detector_isvc(
     admin_client: DynamicClient,
     model_namespace: Namespace,
-    minio_data_connection: Secret,
     huggingface_sr: ServingRuntime,
 ) -> Generator[InferenceService, Any, Any]:
     with create_isvc(
@@ -103,8 +100,8 @@ def hap_detector_isvc(
         deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
         model_format="guardrails-detector-huggingface",
         runtime=huggingface_sr.name,
-        storage_key=minio_data_connection.name,
-        storage_path="granite-guardian-hap-38m",
+        storage_uri="oci://quay.io/trustyai_testing/detectors/granite-guardian-hap-38m"
+        "@sha256:9dd129668cce86dac674814c0a965b1526a01de562fd1e9a28d1892429bdad7b",
         wait_for_predictor_pods=False,
         enable_auth=False,
         resources={
 
@@ -29,7 +29,7 @@ class GuardrailsDetectionPrompt:
 )
 
 PII_OUTPUT_DETECTION_PROMPT: GuardrailsDetectionPrompt = GuardrailsDetectionPrompt(
-    content="Output example email address, nothing else.",
+    content="Give me one email and nothing else.",
     detector_id="regex",
     detection_name="email_address",
     detection_type="pii",
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ class GuardrailsDetectionPrompt:`
`29`	`29`	`)`
`30`	`30`
`31`	`31`	`PII_OUTPUT_DETECTION_PROMPT: GuardrailsDetectionPrompt = GuardrailsDetectionPrompt(`
`32`		`- content="Output example email address, nothing else.",`
	`32`	`+ content="Give me one email and nothing else.",`
`33`	`33`	`detector_id="regex",`
`34`	`34`	`detection_name="email_address",`
`35`	`35`	`detection_type="pii",`