opendatahub-io · threcc · Mar 16, 2026 · Mar 11, 2026
@@ -247,6 +247,7 @@ def _create_llmisvc_from_config(
     namespace: str,
     client: DynamicClient,
     service_account: str | None = None,
+    teardown: bool = True,
 ) -> Generator[LLMInferenceService, Any]:
     """Create an LLMInferenceService from a config class."""
     LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
@@ -283,7 +284,7 @@ def _create_llmisvc_from_config(
         "namespace": namespace,
         "annotations": config_cls.annotations(),
         "label": config_cls.labels(),
-        "teardown": True,
+        "teardown": teardown,
         "model": model,
         "replicas": config_cls.replicas,
         "router": config_cls.router_config(),

@@ -18,7 +18,7 @@ class LLMISvcConfig:
     container_image = None
     template_config_ref = "kserve-config-llm-template"
     enable_auth = False
-    wait_timeout = 180
+    wait_timeout = 240
 
     @classmethod
     def container_resources(cls):

@@ -5,7 +5,9 @@
 import yaml
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.config_map import ConfigMap
+from ocp_resources.gateway import Gateway
 from ocp_resources.inference_service import InferenceService
+from ocp_resources.llm_inference_service import LLMInferenceService
 from ocp_resources.namespace import Namespace
 from ocp_resources.role import Role
 from ocp_resources.role_binding import RoleBinding
@@ -23,6 +25,7 @@
     ModelVersion,
     Protocols,
     RuntimeTemplates,
+    Timeout,
 )
 from utilities.inference_utils import create_isvc
 from utilities.infra import (
@@ -32,6 +35,8 @@
     s3_endpoint_secret,
     update_configmap_data,
 )
+from utilities.llmd_constants import KServeGateway, LLMDGateway
+from utilities.llmd_utils import create_llmd_gateway
 from utilities.logger import RedactedString
 from utilities.serving_runtime import ServingRuntimeFromTemplate
 
@@ -42,6 +47,7 @@
 MODEL_CAR_UPGRADE_NAMESPACE = "upgrade-model-car"
 METRICS_UPGRADE_NAMESPACE = "upgrade-metrics"
 PRIVATE_ENDPOINT_UPGRADE_NAMESPACE = "upgrade-private-endpoint"
+LLMD_UPGRADE_NAMESPACE = "upgrade-llmd"
 S3_CONNECTION = "upgrade-connection"
 
 
@@ -765,3 +771,88 @@ def private_endpoint_inference_service_fixture(
             **isvc_kwargs,
         ) as isvc:
             yield isvc
+
+
+# LLMD Upgrade Fixtures
+@pytest.fixture(scope="session")
+def llmd_namespace_fixture(
+    pytestconfig: pytest.Config,
+    admin_client: DynamicClient,
+    teardown_resources: bool,
+) -> Generator[Namespace, Any, Any]:
+    """Namespace for LLMD upgrade tests."""
+    ns = Namespace(client=admin_client, name=LLMD_UPGRADE_NAMESPACE)
+
+    if pytestconfig.option.post_upgrade:
+        yield ns
+        ns.clean_up()
+    else:
+        with create_ns(
+            admin_client=admin_client,
+            name=LLMD_UPGRADE_NAMESPACE,
+            model_mesh_enabled=False,
+            add_dashboard_label=True,
+            teardown=teardown_resources,
+        ) as ns:
+            yield ns
+
+
+@pytest.fixture(scope="session")
+def llmd_gateway_fixture(
+    pytestconfig: pytest.Config,
+    admin_client: DynamicClient,
+    teardown_resources: bool,
+) -> Generator[Gateway, Any, Any]:
+    """Shared LLMD Gateway for upgrade tests."""
+    gateway = Gateway(
+        client=admin_client,
+        name=LLMDGateway.DEFAULT_NAME,
+        namespace=LLMDGateway.DEFAULT_NAMESPACE,
+        api_group=KServeGateway.API_GROUP,
+    )
+
+    if pytestconfig.option.post_upgrade:
+        yield gateway
+        gateway.clean_up()
+    else:
+        with create_llmd_gateway(
+            client=admin_client,
+            namespace=LLMDGateway.DEFAULT_NAMESPACE,
+            gateway_class_name=LLMDGateway.DEFAULT_CLASS,
+            wait_for_condition=True,
+            timeout=Timeout.TIMEOUT_1MIN,
+            teardown=teardown_resources,
+        ) as gateway:
+            yield gateway
+
+
+@pytest.fixture(scope="session")
+def llmd_inference_service_fixture(
+    pytestconfig: pytest.Config,
+    admin_client: DynamicClient,
+    llmd_namespace_fixture: Namespace,
+    llmd_gateway_fixture: Gateway,
+    teardown_resources: bool,
+) -> Generator[LLMInferenceService, Any, Any]:
+    """LLMInferenceService using TinyLlama OCI for upgrade tests."""
+    from tests.model_serving.model_server.llmd.conftest import _create_llmisvc_from_config
+    from tests.model_serving.model_server.llmd.llmd_configs import TinyLlamaOciConfig
+
+    config_cls = TinyLlamaOciConfig
+    llmisvc = LLMInferenceService(
+        client=admin_client,
+        name=config_cls.name,
+        namespace=llmd_namespace_fixture.name,
+    )
+
+    if pytestconfig.option.post_upgrade:
+        yield llmisvc
+        llmisvc.clean_up()
+    else:
+        with _create_llmisvc_from_config(
+            config_cls=config_cls,
+            namespace=llmd_namespace_fixture.name,
+            client=admin_client,
+            teardown=teardown_resources,
+        ) as llmisvc:
+            yield llmisvc
@@ -0,0 +1,117 @@
+import pytest
+from ocp_resources.gateway import Gateway
+from ocp_resources.llm_inference_service import LLMInferenceService
+
+from tests.model_serving.model_server.llmd.utils import (
+    parse_completion_text,
+    send_chat_completions,
+)
+from tests.model_serving.model_server.upgrade.utils import (
+    verify_gateway_accepted,
+    verify_llmd_pods_not_restarted,
+    verify_llmd_router_not_restarted,
+)
+
+pytestmark = [pytest.mark.llmd_cpu]
+
+PROMPT = "What is the capital of Italy?"
+EXPECTED_ANSWER = "rome"
+
+
+class TestLlmdPreUpgrade:
+    """Pre-upgrade: deploy LLMD InferenceService and validate inference."""
+
+    @pytest.mark.pre_upgrade
+    def test_llmd_llmisvc_deployed(self, llmd_inference_service_fixture: LLMInferenceService):
+        """Test steps:
+
+        1. Verify LLMInferenceService resource exists on the cluster.
+        """
+        assert llmd_inference_service_fixture.exists, (
+            f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist"
+        )
+
+    @pytest.mark.pre_upgrade
+    def test_llmd_inference_pre_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
+        """Test steps:
+
+        1. Send a chat completion request to /v1/chat/completions.
+        2. Assert the response status is 200.
+        3. Assert the completion text contains the expected answer.
+        """
+        status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
+        assert status == 200, f"Expected 200, got {status}: {body}"
+        completion = parse_completion_text(response_body=body)
+        assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"
+
+
+class TestLlmdPostUpgrade:
+    """Post-upgrade: verify LLMD deployment survived the platform upgrade."""
+
+    @pytest.mark.post_upgrade
+    @pytest.mark.dependency(name="llmd_llmisvc_exists")
+    def test_llmd_llmisvc_exists(self, llmd_inference_service_fixture: LLMInferenceService):
+        """Test steps:
+
+        1. Verify LLMInferenceService resource still exists after upgrade.
+        """
+        assert llmd_inference_service_fixture.exists, (
+            f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist after upgrade"
+        )
+
+    @pytest.mark.post_upgrade
+    def test_llmd_gateway_exists(self, llmd_gateway_fixture: Gateway):
+        """Test steps:
+
+        1. Verify the LLMD Gateway resource exists.
+        2. Verify the Gateway has an Accepted condition set to True.
+        """
+        verify_gateway_accepted(gateway=llmd_gateway_fixture)
+
+    @pytest.mark.post_upgrade
+    @pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
+    def test_llmd_workload_pods_not_restarted(
+        self,
+        admin_client,
+        llmd_inference_service_fixture: LLMInferenceService,
+    ):
+        """Test steps:
+
+        1. Get all workload pods for the LLMInferenceService.
+        2. Verify no container has restarted during the upgrade.
+        """
+        verify_llmd_pods_not_restarted(
+            client=admin_client,
+            llmisvc=llmd_inference_service_fixture,
+        )
+
+    @pytest.mark.post_upgrade
+    @pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
+    def test_llmd_router_scheduler_not_restarted(
+        self,
+        admin_client,
+        llmd_inference_service_fixture: LLMInferenceService,
+    ):
+        """Test steps:
+
+        1. Get the router-scheduler pod for the LLMInferenceService.
+        2. Verify no container has restarted during the upgrade.
+        """
+        verify_llmd_router_not_restarted(
+            client=admin_client,
+            llmisvc=llmd_inference_service_fixture,
+        )
+
+    @pytest.mark.post_upgrade
+    @pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
+    def test_llmd_inference_post_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
+        """Test steps:
+
+        1. Send a chat completion request to /v1/chat/completions.
+        2. Assert the response status is 200.
+        3. Assert the completion text contains the expected answer.
+        """
+        status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
+        assert status == 200, f"Expected 200, got {status}: {body}"
+        completion = parse_completion_text(response_body=body)
+        assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"
@@ -1,6 +1,8 @@
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.config_map import ConfigMap
+from ocp_resources.gateway import Gateway
 from ocp_resources.inference_service import InferenceService
+from ocp_resources.llm_inference_service import LLMInferenceService
 from ocp_resources.prometheus import Prometheus
 from ocp_resources.route import Route
 
@@ -315,3 +317,91 @@ def verify_isvc_internal_access(isvc: InferenceService) -> str:
         raise AssertionError(f"InferenceService {isvc.name} has empty URL in status.address")
 
     return url
+
+
+def verify_llmd_pods_not_restarted(
+    client: DynamicClient,
+    llmisvc: LLMInferenceService,
+    max_restarts: int = 0,
+) -> None:
+    """
+    Verify that workload pods for an LLMInferenceService have not restarted.
+
+    Args:
+        client: DynamicClient instance
+        llmisvc: LLMInferenceService instance
+        max_restarts: Maximum allowed restart count (default 0)
+
+    Raises:
+        PodContainersRestartError: If any container has restarted more than max_restarts times
+    """
+    from tests.model_serving.model_server.llmd.utils import get_llmd_workload_pods
+
+    pods = get_llmd_workload_pods(client=client, llmisvc=llmisvc)
+    restarted_containers: dict[str, list[str]] = {}
+
+    for pod in pods:
+        if pod.instance.status.containerStatuses:
+            for container in pod.instance.status.containerStatuses:
+                if container.restartCount > max_restarts:
+                    restarted_containers.setdefault(pod.name, []).append(
+                        f"{container.name} (restarts: {container.restartCount})"
+                    )
+
+    if restarted_containers:
+        raise PodContainersRestartError(f"LLMD workload containers restarted: {restarted_containers}")
+
+
+def verify_llmd_router_not_restarted(
+    client: DynamicClient,
+    llmisvc: LLMInferenceService,
+    max_restarts: int = 0,
+) -> None:
+    """
+    Verify that the router-scheduler pod for an LLMInferenceService has not restarted.
+
+    Args:
+        client: DynamicClient instance
+        llmisvc: LLMInferenceService instance
+        max_restarts: Maximum allowed restart count (default 0)
+
+    Raises:
+        PodContainersRestartError: If any container has restarted more than max_restarts times
+    """
+    from tests.model_serving.model_server.llmd.utils import get_llmd_router_scheduler_pod
+
+    router_pod = get_llmd_router_scheduler_pod(client=client, llmisvc=llmisvc)
+    if not router_pod:
+        raise PodContainersRestartError(f"Router-scheduler pod not found for {llmisvc.name}")
+
+    restarted_containers: dict[str, list[str]] = {}
+    if router_pod.instance.status.containerStatuses:
+        for container in router_pod.instance.status.containerStatuses:
+            if container.restartCount > max_restarts:
+                restarted_containers.setdefault(router_pod.name, []).append(
+                    f"{container.name} (restarts: {container.restartCount})"
+                )
+
+    if restarted_containers:
+        raise PodContainersRestartError(f"LLMD router-scheduler containers restarted: {restarted_containers}")
+
+
+def verify_gateway_accepted(gateway: Gateway) -> None:
+    """
+    Verify that a Gateway resource exists and has an Accepted condition.
+
+    Args:
+        gateway: Gateway instance
+
+    Raises:
+        AssertionError: If gateway does not exist or is not accepted
+    """
+    if not gateway.exists:
+        raise AssertionError(f"Gateway {gateway.name} does not exist in namespace {gateway.namespace}")
+
+    conditions = gateway.instance.status.get("conditions", [])
+    is_accepted = any(
+        condition.get("type") == "Accepted" and condition.get("status") == "True" for condition in conditions
+    )
+    if not is_accepted:
+        raise AssertionError(f"Gateway {gateway.name} is not Accepted. Conditions: {conditions}")