Merge branch 'main' into fix_code_smell

fege · web-flow · commit 2457d9e0a674 · 2026-02-25T09:48:48.000+01:00
diff --git a/conftest.py b/conftest.py
@@ -122,6 +122,11 @@ def pytest_addoption(parser: Parser) -> None:
         default=os.environ.get("TRITON_RUNTIME_IMAGE"),
         help="Specify the runtime image to use for the tests",
     )
+    runtime_group.addoption(
+        "--ovms-runtime-image",
+        default=os.environ.get("OVMS_RUNTIME_IMAGE"),
+        help="Specify the OVMS runtime image to use for the tests",
+    )
 
     # OCI Registry options
     ociregistry_group.addoption(
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -48,6 +48,7 @@
     MinIo,
     OCIRegistry,
     Protocols,
+    RuntimeTemplates,
     Timeout,
 )
 from utilities.data_science_cluster_utils import update_components_in_dsc
@@ -65,6 +66,7 @@
 from utilities.mariadb_utils import wait_for_mariadb_operator_deployments
 from utilities.minio import create_minio_data_connection_secret
 from utilities.operator_utils import get_cluster_service_version, get_csv_related_images
+from utilities.serving_runtime import get_runtime_image_from_template
 from utilities.user_utils import get_byoidc_issuer_url, get_oidc_tokens
 
 LOGGER = get_logger(name=__name__)
@@ -328,6 +330,20 @@ def triton_runtime_image(pytestconfig: pytest.Config) -> str:
     return runtime_image
 
 
+@pytest.fixture(scope="session")
+def ovms_runtime_image(pytestconfig: pytest.Config, admin_client: DynamicClient) -> str:
+    """Return OVMS runtime image from --ovms-runtime-image or cluster template."""
+    runtime_image = pytestconfig.option.ovms_runtime_image
+    if runtime_image:
+        return runtime_image
+    namespace = py_config["applications_namespace"]
+    return get_runtime_image_from_template(
+        client=admin_client,
+        template_name=RuntimeTemplates.OVMS_KSERVE,
+        namespace=namespace,
+    )
+
+
 @pytest.fixture(scope="session")
 def use_unprivileged_client(pytestconfig: pytest.Config) -> bool:
     _use_unprivileged_client = py_config.get("use_unprivileged_client")
diff --git a/tests/model_serving/model_runtime/openvino/conftest.py b/tests/model_serving/model_runtime/openvino/conftest.py
@@ -6,15 +6,18 @@
 - Creating inference services and related Kubernetes resources
 - Managing S3 secrets and service accounts
 - Providing test utilities like snapshots and pod resources
+- OVMS smoke test Pod and ConfigMap for in-cluster script execution
 """
 
 import copy
 from collections.abc import Generator
+from pathlib import Path
 from typing import Any, cast
 
 import pytest
 from kubernetes.dynamic import DynamicClient
 from kubernetes.dynamic.exceptions import ResourceNotFoundError
+from ocp_resources.config_map import ConfigMap
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.namespace import Namespace
 from ocp_resources.pod import Pod
@@ -36,6 +39,12 @@
 
 LOGGER = get_logger(name=__name__)
 
+OVMS_SMOKE_SCRIPTS_DIR = Path(__file__).parent / "smoke"
+OVMS_SMOKE_SCRIPT_NAMES = ("ovms_smoketest.py", "smoke.py")
+OVMS_SMOKE_CONFIGMAP_NAME = "ovms-smoke-scripts"
+OVMS_SMOKE_POD_NAME = "ovms-smoke-pod"
+OVMS_SMOKE_SCRIPTS_MOUNT_PATH = "/scripts"
+
 
 @pytest.fixture(scope="class")
 def openvino_serving_runtime(
@@ -189,3 +198,108 @@ def openvino_pod_resource(
     if not pods:
         raise ResourceNotFoundError(f"No pods found for InferenceService {openvino_inference_service.name}")
     return pods[0]
+
+
+def _load_ovms_smoke_scripts_data() -> dict[str, str]:
+    """Load smoke script file contents for ConfigMap data."""
+    data: dict[str, str] = {}
+    for name in OVMS_SMOKE_SCRIPT_NAMES:
+        path = OVMS_SMOKE_SCRIPTS_DIR / name
+        data[name] = path.read_text()
+    return data
+
+
+@pytest.fixture(scope="class")
+def ovms_smoke_scripts_configmap(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+) -> Generator[ConfigMap]:
+    """
+    ConfigMap containing OVMS smoke test scripts to run inside the container.
+
+    Args:
+        admin_client: Kubernetes dynamic client.
+        model_namespace: Namespace for the ConfigMap.
+
+    Yields:
+        ConfigMap: ConfigMap with ovms_smoketest.py and smoke.py data.
+    """
+    data = _load_ovms_smoke_scripts_data()
+    with ConfigMap(
+        client=admin_client,
+        name=OVMS_SMOKE_CONFIGMAP_NAME,
+        namespace=model_namespace.name,
+        data=data,
+    ) as cm:
+        yield cm
+
+
+@pytest.fixture(scope="class")
+def ovms_smoke_pod(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    ovms_runtime_image: str,
+    ovms_smoke_scripts_configmap: ConfigMap,
+) -> Generator[Pod]:
+    """
+    Pod that runs OVMS smoke scripts inside OpenShift using the OVMS runtime image.
+
+    The smoke scripts are mounted read-only via ConfigMap (not copied).
+    The container runs both scripts in sequence; the Pod succeeds only if both exit 0.
+
+    Args:
+        admin_client: Kubernetes dynamic client.
+        model_namespace: Namespace for the Pod.
+        ovms_runtime_image: Container image for OVMS runtime (from CLI or template).
+        ovms_smoke_scripts_configmap: ConfigMap with smoke script contents.
+
+    Yields:
+        Pod: The completed Pod resource (phase Succeeded when both scripts exit 0).
+    """
+    run_cmd = (
+        f"python {OVMS_SMOKE_SCRIPTS_MOUNT_PATH}/ovms_smoketest.py && python {OVMS_SMOKE_SCRIPTS_MOUNT_PATH}/smoke.py"
+    )
+    # Use writable dirs under /tmp so non-root container can cache models and configs.
+    # HF_HOME is the preferred cache for Hugging Face (TRANSFORMERS_CACHE is deprecated in v5).
+    env_vars = [
+        {"name": "HOME", "value": "/tmp"},
+        {"name": "HF_HOME", "value": "/tmp/hf_cache"},
+        {"name": "MPLCONFIGDIR", "value": "/tmp/matplotlib"},
+    ]
+    with Pod(
+        client=admin_client,
+        name=OVMS_SMOKE_POD_NAME,
+        namespace=model_namespace.name,
+        restart_policy="Never",
+        containers=[
+            {
+                "name": "ovms-smoke",
+                "image": ovms_runtime_image,
+                "command": ["/bin/sh", "-c"],
+                "args": [run_cmd],
+                "env": env_vars,
+                "volumeMounts": [
+                    {
+                        "name": "smoke-scripts",
+                        "mountPath": OVMS_SMOKE_SCRIPTS_MOUNT_PATH,
+                        "readOnly": True,
+                    }
+                ],
+                "securityContext": {
+                    "allowPrivilegeEscalation": False,
+                    "capabilities": {"drop": ["ALL"]},
+                    "runAsNonRoot": True,
+                    "seccompProfile": {"type": "RuntimeDefault"},
+                },
+            }
+        ],
+        volumes=[
+            {
+                "name": "smoke-scripts",
+                "configMap": {"name": ovms_smoke_scripts_configmap.name},
+            }
+        ],
+    ) as pod:
+        LOGGER.info("Waiting for OVMS smoke Pod to complete")
+        pod.wait_for_status(status=Pod.Status.SUCCEEDED, timeout=300)
+        yield pod
diff --git a/tests/model_serving/model_runtime/openvino/smoke/ovms_smoketest.py b/tests/model_serving/model_runtime/openvino/smoke/ovms_smoketest.py
@@ -0,0 +1,36 @@
+from optimum.intel.openvino import OVModelForCausalLM
+from transformers import AutoTokenizer
+
+#
+# Explanation of what's being tested:
+#  - transformers integration:
+#      * AutoTokenizer ensures tokenization functionality from the
+#        Transformers library is correctly integrated.
+#
+#  - optimum.intel.openvino integration:
+#      * OVModelForCausalLM ensures Transformers models can be loaded,
+#        converted, and executed with OpenVINO optimizations via optimum.intel.
+#
+
+# Model name compatible with OpenVINO optimizations
+model_name = "gpt2"
+
+# Load tokenizer (Transformers API)
+tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
+tokenizer.pad_token = tokenizer.eos_token
+
+# Load optimized model (Optimum Intel API with OpenVINO backend)
+model = OVModelForCausalLM.from_pretrained(model_id=model_name, export=True)
+
+# Prepare input text
+prompt = "Testing transformers and optimum.intel integration"
+inputs = tokenizer(text=prompt, return_tensors="pt", padding=True)
+input_ids = inputs.input_ids
+attention_mask = inputs.attention_mask
+
+# Generate output (testing both transformers tokenization & OpenVINO inference)
+output_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=40)
+generated_text = tokenizer.decode(token_ids=output_ids[0], skip_special_tokens=True)
+
+print("Prompt:", prompt)
+print("Generated text:", generated_text)
diff --git a/tests/model_serving/model_runtime/openvino/smoke/smoke.py b/tests/model_serving/model_runtime/openvino/smoke/smoke.py
@@ -0,0 +1,32 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+
+#
+# Explanation of What This Verifies:
+#
+#  * Tokenizer compatibility: ensures tokenization and decoding work properly.
+#  * Core model loading: confirms transformers properly load models and weights.
+#  * Inference via pipeline: ensures the transformers pipeline mechanism
+#    is working properly.
+#
+
+# Load tokenizer and model directly from transformers
+model_name = "gpt2"
+tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
+model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name)
+
+# Test tokenization explicitly
+test_text = "The transformers library on RHEL 9"
+
+encoded = tokenizer.encode(text=test_text, return_tensors="pt")
+decoded = tokenizer.decode(token_ids=encoded[0])
+
+
+print("Original text:", test_text)
+print("Decoded text after tokenization:", decoded)
+
+# Test text-generation pipeline
+text_generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
+generated_text = text_generator(text_inputs=test_text, max_length=30, num_return_sequences=1)
+
+print("\nGenerated text example:")
+print(generated_text[0]["generated_text"])
diff --git a/tests/model_serving/model_runtime/openvino/test_ovms_smoke.py b/tests/model_serving/model_runtime/openvino/test_ovms_smoke.py
@@ -0,0 +1,53 @@
+"""
+OVMS smoke test: run smoke scripts inside OpenShift using the OVMS runtime image.
+
+How the Pod works:
+  - A Pod is created in the test namespace with restart_policy=Never.
+  - The smoke scripts (ovms_smoketest.py, smoke.py) are mounted read-only at /scripts
+    via a ConfigMap populated from the repo files.
+  - The container runs: python /scripts/ovms_smoketest.py && python /scripts/smoke.py.
+  - If both scripts exit 0, the Pod phase becomes Succeeded.
+  - If either script fails (non-zero exit or exception), the Pod fails and the test fails.
+  - The test asserts Pod phase Succeeded; logs available via oc logs for debugging.
+
+Note:
+  This test requires internet access to download models from Hugging Face (e.g., "gpt2").
+  It will fail in disconnected/air-gapped environments where external model downloads are not available.
+"""
+
+import pytest
+from ocp_resources.pod import Pod
+
+
+@pytest.mark.smoke
+@pytest.mark.parametrize(
+    "model_namespace",
+    [pytest.param({"name": "ovms-smoke"}, id="ovms-smoke")],
+    indirect=["model_namespace"],
+)
+class TestOVMSSmokeInOpenShift:
+    """
+    Test class for OVMS smoke execution inside OpenShift.
+
+    Runs ovms_smoketest.py and smoke.py inside a Pod using the OVMS runtime image,
+    with optional image override via --ovms-runtime-image.
+    """
+
+    def test_ovms_smoke_runs_in_openshift(self, ovms_smoke_pod: Pod) -> None:
+        """
+        OVMS smoke scripts run successfully inside an OpenShift Pod.
+
+        Given the OVMS runtime image (from --ovms-runtime-image or template),
+        when the smoke Pod runs ovms_smoketest.py and smoke.py in the container,
+        then the Pod completes with phase Succeeded and the test passes.
+
+        Note:
+            This test requires internet access to download models from Hugging Face.
+            It will fail in disconnected/air-gapped environments.
+
+        Args:
+            ovms_smoke_pod: The completed Kubernetes Pod that ran the smoke scripts.
+        """
+        assert ovms_smoke_pod.instance.status.phase == "Succeeded", (
+            f"OVMS smoke Pod did not succeed: phase={ovms_smoke_pod.instance.status.phase}"
+        )
diff --git a/utilities/serving_runtime.py b/utilities/serving_runtime.py
@@ -10,6 +10,49 @@
 from utilities.constants import ApiGroups, PortNames, Protocols, vLLM_CONFIG
 
 
+def get_runtime_image_from_template(
+    client: DynamicClient,
+    template_name: str,
+    namespace: str,
+) -> str:
+    """
+    Get the runtime image from a serving runtime template.
+
+    Args:
+        client: Kubernetes dynamic client
+        template_name: Name of the template
+        namespace: Namespace where the template exists
+
+    Returns:
+        str: Container image from the first container in the template
+
+    Raises:
+        ResourceNotFoundError: If the template is not found, has no objects, or has no containers
+    """
+    template = Template(
+        client=client,
+        name=template_name,
+        namespace=namespace,
+    )
+    if not template.exists:
+        raise ResourceNotFoundError(f"{template_name} template not found in namespace {namespace}")
+
+    objects = template.instance.objects
+    if not objects:
+        raise ResourceNotFoundError(f"{template_name} template has no objects")
+    model_dict: dict[str, Any] = objects[0].to_dict()
+    containers = model_dict.get("spec", {}).get("containers", [])
+
+    if not containers:
+        raise ResourceNotFoundError(f"{template_name} template has no containers")
+
+    image = containers[0].get("image")
+    if not image:
+        raise ResourceNotFoundError(f"{template_name} template container has no image")
+
+    return image
+
+
 class ServingRuntimeFromTemplate(ServingRuntime):
     def __init__(
         self,