feat: [Explainability] Add LMEval tests with VLLM emulator

adolfo-ab · adolfo-ab · commit 3254854531ea · 2025-03-10T15:34:18.000+01:00
diff --git a/tests/model_explainability/lm_eval/conftest.py b/tests/model_explainability/lm_eval/conftest.py
@@ -1,6 +1,8 @@
-from typing import Generator
+from typing import Generator, Any
 
 import pytest
+from ocp_resources.route import Route
+from ocp_resources.service import Service
 from pytest import FixtureRequest
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.config_map import ConfigMap
@@ -15,13 +17,17 @@
 from utilities.constants import Labels, Timeout, Annotations
 
 
+VLLM_EMULATOR: str = "vllm-emulator"
+LMEVALJOB_NAME: str = "lmeval-test-job"
+
+
 @pytest.fixture(scope="function")
 def lmevaljob_hf(
     admin_client: DynamicClient, model_namespace: Namespace, patched_trustyai_operator_configmap_allow_online: ConfigMap
-) -> Generator[LMEvalJob, None, None]:
+) -> Generator[LMEvalJob, Any, Any]:
     with LMEvalJob(
         client=admin_client,
-        name="test-job",
+        name=LMEVALJOB_NAME,
         namespace=model_namespace.name,
         model="hf",
         model_args=[{"name": "pretrained", "value": "google/flan-t5-base"}],
@@ -44,10 +50,10 @@ def lmevaljob_local_offline(
     model_namespace: Namespace,
     patched_trustyai_operator_configmap_allow_online: ConfigMap,
     lmeval_data_downloader_pod: Pod,
-) -> Generator[LMEvalJob, None, None]:
+) -> Generator[LMEvalJob, Any, Any]:
     with LMEvalJob(
         client=admin_client,
-        name="lmeval-test",
+        name=LMEVALJOB_NAME,
         namespace=model_namespace.name,
         model="hf",
         model_args=[{"name": "pretrained", "value": "/opt/app-root/src/hf_home/flan"}],
@@ -68,7 +74,39 @@ def lmevaljob_local_offline(
 
 
 @pytest.fixture(scope="function")
-def patched_trustyai_operator_configmap_allow_online(admin_client: DynamicClient) -> Generator[ConfigMap, None, None]:
+def lmevaljob_vllm_emulator(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    patched_trustyai_operator_configmap_allow_online: ConfigMap,
+    vllm_emulator_deployment: Deployment,
+    vllm_emulator_service: Service,
+    vllm_emulator_route: Route,
+) -> Generator[LMEvalJob, Any, Any]:
+    with LMEvalJob(
+        client=admin_client,
+        namespace=model_namespace.name,
+        name=LMEVALJOB_NAME,
+        model="local-completions",
+        task_list={"taskNames": ["arc_easy"]},
+        log_samples=True,
+        batch_size="1",
+        allow_online=True,
+        allow_code_execution=False,
+        outputs={"pvcManaged": {"size": "5Gi"}},
+        model_args=[
+            {"name": "model", "value": "emulatedModel"},
+            {"name": "base_url", "value": f"http://{vllm_emulator_service.name}:8000/v1/completions"},
+            {"name": "num_concurrent", "value": "1"},
+            {"name": "max_retries", "value": "3"},
+            {"name": "tokenized_requests", "value": "False"},
+            {"name": "tokenizer", "value": "ibm-granite/granite-guardian-3.1-8b"},
+        ],
+    ) as job:
+        yield job
+
+
+@pytest.fixture(scope="function")
+def patched_trustyai_operator_configmap_allow_online(admin_client: DynamicClient) -> Generator[ConfigMap, Any, Any]:
     namespace: str = py_config["applications_namespace"]
     trustyai_service_operator: str = "trustyai-service-operator"
 
@@ -99,7 +137,7 @@ def patched_trustyai_operator_configmap_allow_online(admin_client: DynamicClient
 @pytest.fixture(scope="function")
 def lmeval_data_pvc(
     admin_client: DynamicClient, model_namespace: Namespace
-) -> Generator[PersistentVolumeClaim, None, None]:
+) -> Generator[PersistentVolumeClaim, Any, Any]:
     with PersistentVolumeClaim(
         client=admin_client,
         name="lmeval-data",
@@ -117,7 +155,7 @@ def lmeval_data_downloader_pod(
     admin_client: DynamicClient,
     model_namespace: Namespace,
     lmeval_data_pvc: PersistentVolumeClaim,
-) -> Generator[Pod, None, None]:
+) -> Generator[Pod, Any, Any]:
     with Pod(
         client=admin_client,
         namespace=model_namespace.name,
@@ -143,3 +181,76 @@ def lmeval_data_downloader_pod(
     ) as pod:
         pod.wait_for_status(status=Pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_10MIN)
         yield pod
+
+
+@pytest.fixture(scope="function")
+def vllm_emulator_deployment(
+    admin_client: DynamicClient, model_namespace: Namespace
+) -> Generator[Deployment, Any, Any]:
+    label = {"app": VLLM_EMULATOR}
+    with Deployment(
+        client=admin_client,
+        namespace=model_namespace.name,
+        name=VLLM_EMULATOR,
+        label=label,
+        selector={"matchLabels": label},
+        template={
+            "metadata": {
+                "labels": {
+                    "app": VLLM_EMULATOR,
+                    "maistra.io/expose-route": "true",
+                },
+                "name": VLLM_EMULATOR,
+            },
+            "spec": {
+                "containers": [
+                    {
+                        "image": "quay.io/trustyai_testing/vllm_emulator"
+                        "@sha256:4214f31bff9de6cc723da23324fb8974cea8abadcab621d85a97a3503cabbdc6",
+                        "name": "vllm-emulator",
+                        "securityContext": {
+                            "allowPrivilegeEscalation": False,
+                            "capabilities": {"drop": ["ALL"]},
+                            "seccompProfile": {"type": "RuntimeDefault"},
+                        },
+                    }
+                ]
+            },
+        },
+        replicas=1,
+    ) as deployment:
+        yield deployment
+
+
+@pytest.fixture(scope="function")
+def vllm_emulator_service(
+    admin_client: DynamicClient, model_namespace: Namespace, vllm_emulator_deployment: Deployment
+) -> Generator[Service, Any, Any]:
+    with Service(
+        client=admin_client,
+        namespace=model_namespace.name,
+        name=f"{VLLM_EMULATOR}-service",
+        ports=[
+            {
+                "name": f"{VLLM_EMULATOR}-endpoint",
+                "port": 8000,
+                "protocol": "TCP",
+                "targetPort": 8000,
+            }
+        ],
+        selector={"app": VLLM_EMULATOR},
+    ) as service:
+        yield service
+
+
+@pytest.fixture(scope="function")
+def vllm_emulator_route(
+    admin_client: DynamicClient, model_namespace: Namespace, vllm_emulator_service: Service
+) -> Generator[Route, Any, Any]:
+    with Route(
+        client=admin_client,
+        namespace=model_namespace.name,
+        name=VLLM_EMULATOR,
+        service=vllm_emulator_service.name,
+    ) as route:
+        yield route
diff --git a/tests/model_explainability/lm_eval/test_lm_eval.py b/tests/model_explainability/lm_eval/test_lm_eval.py
@@ -78,3 +78,23 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
 ):
     """Test that verifies that LMEval can run successfully in local, offline mode using unitxt"""
     verify_lmevaljob_running(client=admin_client, lmevaljob=lmevaljob_local_offline)
+
+
+@pytest.mark.parametrize(
+    "model_namespace",
+    [
+        pytest.param(
+            {"name": "test-lmeval-vllm"},
+        )
+    ],
+    indirect=True,
+)
+def test_lmeval_vllm_emulator(admin_client, model_namespace, lmevaljob_vllm_emulator):
+    """Basic test that verifies that LMEval can run successfully pulling a model from HuggingFace."""
+    lmevaljob_pod = Pod(
+        client=admin_client,
+        name=lmevaljob_vllm_emulator.name,
+        namespace=lmevaljob_vllm_emulator.namespace,
+        wait_for_resource=True,
+    )
+    lmevaljob_pod.wait_for_status(status=lmevaljob_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_10MIN)