|
| 1 | +from typing import Generator, Any |
| 2 | + |
| 3 | +import pytest |
| 4 | +import yaml |
| 5 | +from kubernetes.dynamic import DynamicClient |
| 6 | +from ocp_resources.config_map import ConfigMap |
| 7 | +from ocp_resources.deployment import Deployment |
| 8 | +from ocp_resources.guardrails_orchestrator import GuardrailsOrchestrator |
| 9 | +from ocp_resources.inference_service import InferenceService |
| 10 | +from ocp_resources.namespace import Namespace |
| 11 | +from ocp_resources.persistent_volume_claim import PersistentVolumeClaim |
| 12 | +from ocp_resources.role_binding import RoleBinding |
| 13 | +from ocp_resources.route import Route |
| 14 | +from ocp_resources.secret import Secret |
| 15 | +from ocp_resources.service import Service |
| 16 | +from ocp_resources.service_account import ServiceAccount |
| 17 | +from ocp_resources.serving_runtime import ServingRuntime |
| 18 | + |
| 19 | +from tests.model_explainability.constants import ( |
| 20 | + MINIO, |
| 21 | + MINIO_ACCESS_KEY, |
| 22 | + MINIO_SECRET_KEY, |
| 23 | + MINIO_ACCESS_KEY_VALUE, |
| 24 | + MINIO_SECRET_KEY_VALUE, |
| 25 | +) |
| 26 | +from utilities.constants import KServeDeploymentType, Timeout, Ports |
| 27 | +from utilities.inference_utils import create_isvc |
| 28 | +from utilities.serving_runtime import ServingRuntimeFromTemplate |
| 29 | + |
| 30 | + |
| 31 | +USER_ONE: str = "user-one" |
| 32 | +GUARDRAILS_ORCHESTRATOR_PORT: int = 8032 |
| 33 | + |
| 34 | + |
| 35 | +@pytest.fixture(scope="class") |
| 36 | +def guardrails_orchestrator_health_route( |
| 37 | + admin_client: DynamicClient, model_namespace: Namespace, guardrails_orchestrator: GuardrailsOrchestrator |
| 38 | +) -> Generator[Route, Any, Any]: |
| 39 | + route = Route( |
| 40 | + name=f"{guardrails_orchestrator.name}-health", |
| 41 | + namespace=guardrails_orchestrator.namespace, |
| 42 | + wait_for_resource=True, |
| 43 | + ensure_exists=True, |
| 44 | + ) |
| 45 | + yield route |
| 46 | + |
| 47 | + |
| 48 | +@pytest.fixture(scope="class") |
| 49 | +def guardrails_orchestrator( |
| 50 | + admin_client: DynamicClient, |
| 51 | + model_namespace: Namespace, |
| 52 | + orchestrator_configmap: ConfigMap, |
| 53 | + vllm_gateway_config: ConfigMap, |
| 54 | + vllm_images_configmap: ConfigMap, |
| 55 | +) -> Generator[GuardrailsOrchestrator, Any, Any]: |
| 56 | + with GuardrailsOrchestrator( |
| 57 | + client=admin_client, |
| 58 | + name="gorch-test", |
| 59 | + namespace=model_namespace.name, |
| 60 | + orchestrator_config=orchestrator_configmap.name, |
| 61 | + vllm_gateway_config=vllm_gateway_config.name, |
| 62 | + replicas=1, |
| 63 | + wait_for_resource=True, |
| 64 | + ) as gorch: |
| 65 | + orchestrator_deployment = Deployment(name=gorch.name, namespace=gorch.namespace, wait_for_resource=True) |
| 66 | + orchestrator_deployment.wait_for_replicas() |
| 67 | + yield gorch |
| 68 | + |
| 69 | + |
| 70 | +@pytest.fixture(scope="class") |
| 71 | +def qwen_llm_model( |
| 72 | + admin_client: DynamicClient, |
| 73 | + model_namespace: Namespace, |
| 74 | + minio_data_connection: Secret, |
| 75 | + vllm_runtime: ServingRuntime, |
| 76 | +) -> Generator[InferenceService, Any, Any]: |
| 77 | + with create_isvc( |
| 78 | + client=admin_client, |
| 79 | + name="llm", |
| 80 | + namespace=model_namespace.name, |
| 81 | + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, |
| 82 | + model_format="vLLM", |
| 83 | + runtime=vllm_runtime.name, |
| 84 | + storage_key=minio_data_connection.name, |
| 85 | + storage_path="Qwen2.5-0.5B-Instruct", |
| 86 | + wait_for_predictor_pods=False, |
| 87 | + enable_auth=True, |
| 88 | + resources={"requests": {"cpu": "1", "memory": "8Gi"}, "limits": {"cpu": "2", "memory": "10Gi"}}, |
| 89 | + ) as isvc: |
| 90 | + yield isvc |
| 91 | + |
| 92 | + |
| 93 | +@pytest.fixture(scope="class") |
| 94 | +def vllm_runtime( |
| 95 | + admin_client: DynamicClient, |
| 96 | + model_namespace: Namespace, |
| 97 | + minio_llm_deployment: Deployment, |
| 98 | + minio_service: Service, |
| 99 | + minio_data_connection: Secret, |
| 100 | +) -> Generator[ServingRuntime, Any, Any]: |
| 101 | + with ServingRuntimeFromTemplate( |
| 102 | + client=admin_client, |
| 103 | + name="vllm-runtime-cpu-fp16", |
| 104 | + namespace=model_namespace.name, |
| 105 | + template_name="vllm-runtime-template", |
| 106 | + deployment_type=KServeDeploymentType.RAW_DEPLOYMENT, |
| 107 | + runtime_image="quay.io/rh-aiservices-bu/vllm-cpu-openai-ubi9" |
| 108 | + "@sha256:d680ff8becb6bbaf83dfee7b2d9b8a2beb130db7fd5aa7f9a6d8286a58cebbfd", |
| 109 | + containers={ |
| 110 | + "kserve-container": { |
| 111 | + "args": [ |
| 112 | + f"--port={str(GUARDRAILS_ORCHESTRATOR_PORT)}", |
| 113 | + "--model=/mnt/models", |
| 114 | + ], |
| 115 | + "ports": [{"containerPort": GUARDRAILS_ORCHESTRATOR_PORT, "protocol": "TCP"}], |
| 116 | + "volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}], |
| 117 | + } |
| 118 | + }, |
| 119 | + volumes=[{"emptyDir": {"medium": "Memory", "sizeLimit": "2Gi"}, "name": "shm"}], |
| 120 | + ) as serving_runtime: |
| 121 | + yield serving_runtime |
| 122 | + |
| 123 | + |
| 124 | +@pytest.fixture(scope="class") |
| 125 | +def vllm_images_configmap(admin_client: DynamicClient, model_namespace: Namespace) -> Generator[ConfigMap, Any, Any]: |
| 126 | + with ConfigMap( |
| 127 | + client=admin_client, |
| 128 | + name="gorch-test-config", |
| 129 | + namespace=model_namespace.name, |
| 130 | + data={ |
| 131 | + "regexDetectorImage": "quay.io/trustyai_testing/regex-detector" |
| 132 | + "@sha256:e9df9f7e7429e29da9b8d9920d80cdc85a496e7961f6edb19132d604a914049b", |
| 133 | + "vllmGatewayImage": "quay.io/trustyai_testing/vllm-orchestrator-gateway" |
| 134 | + "@sha256:d0bbf2de95c69f76215a016820f294202c48721dee452b3939e36133697d5b1d", |
| 135 | + }, |
| 136 | + ) as cm: |
| 137 | + yield cm |
| 138 | + |
| 139 | + |
| 140 | +@pytest.fixture(scope="class") |
| 141 | +def orchestrator_configmap( |
| 142 | + admin_client: DynamicClient, model_namespace: Namespace, qwen_llm_model: InferenceService |
| 143 | +) -> Generator[ConfigMap, Any, Any]: |
| 144 | + with ConfigMap( |
| 145 | + client=admin_client, |
| 146 | + name="fms-orchestr8-config-nlp", |
| 147 | + namespace=model_namespace.name, |
| 148 | + data={ |
| 149 | + "config.yaml": yaml.dump({ |
| 150 | + "chat_generation": { |
| 151 | + "service": { |
| 152 | + "hostname": f"{qwen_llm_model.name}-predictor.{model_namespace.name}.svc.cluster.local", |
| 153 | + "port": GUARDRAILS_ORCHESTRATOR_PORT, |
| 154 | + } |
| 155 | + }, |
| 156 | + "detectors": { |
| 157 | + "regex": { |
| 158 | + "type": "text_contents", |
| 159 | + "service": {"hostname": "127.0.0.1", "port": Ports.REST_PORT}, |
| 160 | + "chunker_id": "whole_doc_chunker", |
| 161 | + "default_threshold": 0.5, |
| 162 | + } |
| 163 | + }, |
| 164 | + }) |
| 165 | + }, |
| 166 | + ) as cm: |
| 167 | + yield cm |
| 168 | + |
| 169 | + |
| 170 | +@pytest.fixture(scope="class") |
| 171 | +def vllm_gateway_config(admin_client: DynamicClient, model_namespace: Namespace) -> Generator[ConfigMap, Any, Any]: |
| 172 | + with ConfigMap( |
| 173 | + client=admin_client, |
| 174 | + name="fms-orchestr8-config-gateway", |
| 175 | + namespace=model_namespace.name, |
| 176 | + label={"app": "fmstack-nlp"}, |
| 177 | + data={ |
| 178 | + "config.yaml": yaml.dump({ |
| 179 | + "orchestrator": {"host": "localhost", "port": GUARDRAILS_ORCHESTRATOR_PORT}, |
| 180 | + "detectors": [ |
| 181 | + {"name": "regex", "detector_params": {"regex": ["email", "ssn"]}}, |
| 182 | + {"name": "other_detector"}, |
| 183 | + ], |
| 184 | + "routes": [{"name": "pii", "detectors": ["regex"]}, {"name": "passthrough", "detectors": []}], |
| 185 | + }) |
| 186 | + }, |
| 187 | + ) as cm: |
| 188 | + yield cm |
| 189 | + |
| 190 | + |
| 191 | +@pytest.fixture(scope="class") |
| 192 | +def minio_llm_deployment( |
| 193 | + admin_client: DynamicClient, |
| 194 | + model_namespace: Namespace, |
| 195 | + llm_models_pvc: PersistentVolumeClaim, |
| 196 | +) -> Generator[Deployment, Any, Any]: |
| 197 | + with Deployment( |
| 198 | + client=admin_client, |
| 199 | + name="llm-container-deployment", |
| 200 | + namespace=model_namespace.name, |
| 201 | + replicas=1, |
| 202 | + selector={"matchLabels": {"app": MINIO}}, |
| 203 | + template={ |
| 204 | + "metadata": {"labels": {"app": MINIO, "maistra.io/expose-route": "true"}, "name": MINIO}, |
| 205 | + "spec": { |
| 206 | + "volumes": [{"name": "model-volume", "persistentVolumeClaim": {"claimName": "llm-models-claim"}}], |
| 207 | + "initContainers": [ |
| 208 | + { |
| 209 | + "name": "download-model", |
| 210 | + "image": "quay.io/trustyai_testing/llm-downloader-bootstrap" |
| 211 | + "@sha256:d3211cc581fe69ca9a1cb75f84e5d08cacd1854cb2d63591439910323b0cbb57", |
| 212 | + "securityContext": {"fsGroup": 1001}, |
| 213 | + "command": [ |
| 214 | + "bash", |
| 215 | + "-c", |
| 216 | + 'model="Qwen/Qwen2.5-0.5B-Instruct"' |
| 217 | + '\necho "starting download"' |
| 218 | + "\n/tmp/venv/bin/huggingface-cli download $model " |
| 219 | + "--local-dir /mnt/models/llms/$(basename $model)" |
| 220 | + '\necho "Done!"', |
| 221 | + ], |
| 222 | + "resources": {"limits": {"memory": "5Gi", "cpu": "2"}}, |
| 223 | + "volumeMounts": [{"mountPath": "/mnt/models/", "name": "model-volume"}], |
| 224 | + } |
| 225 | + ], |
| 226 | + "containers": [ |
| 227 | + { |
| 228 | + "args": ["server", "/models"], |
| 229 | + "env": [ |
| 230 | + {"name": MINIO_ACCESS_KEY, "value": MINIO_ACCESS_KEY_VALUE}, |
| 231 | + {"name": MINIO_SECRET_KEY, "value": MINIO_SECRET_KEY_VALUE}, |
| 232 | + ], |
| 233 | + "image": "quay.io/trustyai/modelmesh-minio-examples" |
| 234 | + "@sha256:65cb22335574b89af15d7409f62feffcc52cc0e870e9419d63586f37706321a5", |
| 235 | + "name": MINIO, |
| 236 | + "securityContext": { |
| 237 | + "allowPrivilegeEscalation": False, |
| 238 | + "capabilities": {"drop": ["ALL"]}, |
| 239 | + "seccompProfile": {"type": "RuntimeDefault"}, |
| 240 | + }, |
| 241 | + "volumeMounts": [{"mountPath": "/models/", "name": "model-volume"}], |
| 242 | + } |
| 243 | + ], |
| 244 | + }, |
| 245 | + }, |
| 246 | + label={"app": MINIO}, |
| 247 | + wait_for_resource=True, |
| 248 | + ) as deployment: |
| 249 | + deployment.wait_for_replicas(timeout=Timeout.TIMEOUT_10MIN) |
| 250 | + yield deployment |
| 251 | + |
| 252 | + |
| 253 | +@pytest.fixture(scope="class") |
| 254 | +def llm_models_pvc( |
| 255 | + admin_client: DynamicClient, model_namespace: Namespace |
| 256 | +) -> Generator[PersistentVolumeClaim, Any, Any]: |
| 257 | + with PersistentVolumeClaim( |
| 258 | + client=admin_client, |
| 259 | + name="llm-models-claim", |
| 260 | + namespace=model_namespace.name, |
| 261 | + accessmodes=PersistentVolumeClaim.AccessMode.RWO, |
| 262 | + volume_mode=PersistentVolumeClaim.VolumeMode.FILE, |
| 263 | + size="10Gi", |
| 264 | + ) as pvc: |
| 265 | + yield pvc |
| 266 | + |
| 267 | + |
| 268 | +@pytest.fixture(scope="class") |
| 269 | +def user_one_service_account( |
| 270 | + admin_client: DynamicClient, model_namespace: Namespace |
| 271 | +) -> Generator[ServiceAccount, Any, Any]: |
| 272 | + with ServiceAccount(client=admin_client, name=USER_ONE, namespace=model_namespace.name) as service_account: |
| 273 | + yield service_account |
| 274 | + |
| 275 | + |
| 276 | +@pytest.fixture(scope="class") |
| 277 | +def user_one_rolebinding( |
| 278 | + admin_client: DynamicClient, model_namespace: Namespace, user_one_service_account: ServiceAccount |
| 279 | +) -> Generator[RoleBinding, Any, Any]: |
| 280 | + with RoleBinding( |
| 281 | + client=admin_client, |
| 282 | + name=f"{user_one_service_account.name}-view", |
| 283 | + namespace=model_namespace.name, |
| 284 | + subjects=[{"kind": "ServiceAccount", "name": user_one_service_account.name}], |
| 285 | + role_ref={"apiGroup": "rbac.authorization.k8s.io", "kind": "Role", "name": "view"}, |
| 286 | + ) as role_binding: |
| 287 | + yield role_binding |
0 commit comments