Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/model_serving/model_server/llmd/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def _create_llmisvc_from_config(
namespace: str,
client: DynamicClient,
service_account: str | None = None,
teardown: bool = True,
) -> Generator[LLMInferenceService, Any]:
"""Create an LLMInferenceService from a config class."""
LOGGER.info(f"\n{config_cls.describe(namespace=namespace)}")
Expand Down Expand Up @@ -283,7 +284,7 @@ def _create_llmisvc_from_config(
"namespace": namespace,
"annotations": config_cls.annotations(),
"label": config_cls.labels(),
"teardown": True,
"teardown": teardown,
"model": model,
"replicas": config_cls.replicas,
"router": config_cls.router_config(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class LLMISvcConfig:
container_image = None
template_config_ref = "kserve-config-llm-template"
enable_auth = False
wait_timeout = 180
wait_timeout = 240

@classmethod
def container_resources(cls):
Expand Down
91 changes: 91 additions & 0 deletions tests/model_serving/model_server/upgrade/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import yaml
from kubernetes.dynamic import DynamicClient
from ocp_resources.config_map import ConfigMap
from ocp_resources.gateway import Gateway
from ocp_resources.inference_service import InferenceService
from ocp_resources.llm_inference_service import LLMInferenceService
from ocp_resources.namespace import Namespace
from ocp_resources.role import Role
from ocp_resources.role_binding import RoleBinding
Expand All @@ -23,6 +25,7 @@
ModelVersion,
Protocols,
RuntimeTemplates,
Timeout,
)
from utilities.inference_utils import create_isvc
from utilities.infra import (
Expand All @@ -32,6 +35,8 @@
s3_endpoint_secret,
update_configmap_data,
)
from utilities.llmd_constants import KServeGateway, LLMDGateway
from utilities.llmd_utils import create_llmd_gateway
from utilities.logger import RedactedString
from utilities.serving_runtime import ServingRuntimeFromTemplate

Expand All @@ -42,6 +47,7 @@
MODEL_CAR_UPGRADE_NAMESPACE = "upgrade-model-car"
METRICS_UPGRADE_NAMESPACE = "upgrade-metrics"
PRIVATE_ENDPOINT_UPGRADE_NAMESPACE = "upgrade-private-endpoint"
LLMD_UPGRADE_NAMESPACE = "upgrade-llmd"
S3_CONNECTION = "upgrade-connection"


Expand Down Expand Up @@ -765,3 +771,88 @@ def private_endpoint_inference_service_fixture(
**isvc_kwargs,
) as isvc:
yield isvc


# LLMD Upgrade Fixtures
@pytest.fixture(scope="session")
def llmd_namespace_fixture(
pytestconfig: pytest.Config,
admin_client: DynamicClient,
teardown_resources: bool,
) -> Generator[Namespace, Any, Any]:
"""Namespace for LLMD upgrade tests."""
ns = Namespace(client=admin_client, name=LLMD_UPGRADE_NAMESPACE)

if pytestconfig.option.post_upgrade:
yield ns
ns.clean_up()
else:
with create_ns(
admin_client=admin_client,
name=LLMD_UPGRADE_NAMESPACE,
model_mesh_enabled=False,
add_dashboard_label=True,
teardown=teardown_resources,
) as ns:
yield ns


@pytest.fixture(scope="session")
def llmd_gateway_fixture(
pytestconfig: pytest.Config,
admin_client: DynamicClient,
teardown_resources: bool,
) -> Generator[Gateway, Any, Any]:
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"""Shared LLMD Gateway for upgrade tests."""
gateway = Gateway(
client=admin_client,
name=LLMDGateway.DEFAULT_NAME,
namespace=LLMDGateway.DEFAULT_NAMESPACE,
api_group=KServeGateway.API_GROUP,
)

if pytestconfig.option.post_upgrade:
yield gateway
gateway.clean_up()
else:
with create_llmd_gateway(
client=admin_client,
namespace=LLMDGateway.DEFAULT_NAMESPACE,
gateway_class_name=LLMDGateway.DEFAULT_CLASS,
wait_for_condition=True,
timeout=Timeout.TIMEOUT_1MIN,
teardown=teardown_resources,
) as gateway:
yield gateway


@pytest.fixture(scope="session")
def llmd_inference_service_fixture(
pytestconfig: pytest.Config,
admin_client: DynamicClient,
llmd_namespace_fixture: Namespace,
llmd_gateway_fixture: Gateway,
teardown_resources: bool,
) -> Generator[LLMInferenceService, Any, Any]:
"""LLMInferenceService using TinyLlama OCI for upgrade tests."""
from tests.model_serving.model_server.llmd.conftest import _create_llmisvc_from_config
from tests.model_serving.model_server.llmd.llmd_configs import TinyLlamaOciConfig

config_cls = TinyLlamaOciConfig
llmisvc = LLMInferenceService(
client=admin_client,
name=config_cls.name,
namespace=llmd_namespace_fixture.name,
)

if pytestconfig.option.post_upgrade:
yield llmisvc
llmisvc.clean_up()
else:
with _create_llmisvc_from_config(
config_cls=config_cls,
namespace=llmd_namespace_fixture.name,
client=admin_client,
teardown=teardown_resources,
) as llmisvc:
yield llmisvc
117 changes: 117 additions & 0 deletions tests/model_serving/model_server/upgrade/test_upgrade_llmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import pytest
from ocp_resources.gateway import Gateway
from ocp_resources.llm_inference_service import LLMInferenceService

from tests.model_serving.model_server.llmd.utils import (
parse_completion_text,
send_chat_completions,
)
from tests.model_serving.model_server.upgrade.utils import (
verify_gateway_accepted,
verify_llmd_pods_not_restarted,
verify_llmd_router_not_restarted,
)

pytestmark = [pytest.mark.llmd_cpu]

PROMPT = "What is the capital of Italy?"
EXPECTED_ANSWER = "rome"


class TestLlmdPreUpgrade:
"""Pre-upgrade: deploy LLMD InferenceService and validate inference."""

@pytest.mark.pre_upgrade
def test_llmd_llmisvc_deployed(self, llmd_inference_service_fixture: LLMInferenceService):
"""Test steps:

1. Verify LLMInferenceService resource exists on the cluster.
"""
assert llmd_inference_service_fixture.exists, (
f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist"
)

@pytest.mark.pre_upgrade
def test_llmd_inference_pre_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
"""Test steps:

1. Send a chat completion request to /v1/chat/completions.
2. Assert the response status is 200.
3. Assert the completion text contains the expected answer.
"""
status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
assert status == 200, f"Expected 200, got {status}: {body}"
completion = parse_completion_text(response_body=body)
assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"


class TestLlmdPostUpgrade:
"""Post-upgrade: verify LLMD deployment survived the platform upgrade."""

@pytest.mark.post_upgrade
@pytest.mark.dependency(name="llmd_llmisvc_exists")
def test_llmd_llmisvc_exists(self, llmd_inference_service_fixture: LLMInferenceService):
"""Test steps:

1. Verify LLMInferenceService resource still exists after upgrade.
"""
assert llmd_inference_service_fixture.exists, (
f"LLMInferenceService {llmd_inference_service_fixture.name} does not exist after upgrade"
)

@pytest.mark.post_upgrade
def test_llmd_gateway_exists(self, llmd_gateway_fixture: Gateway):
"""Test steps:

1. Verify the LLMD Gateway resource exists.
2. Verify the Gateway has an Accepted condition set to True.
"""
verify_gateway_accepted(gateway=llmd_gateway_fixture)

@pytest.mark.post_upgrade
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
def test_llmd_workload_pods_not_restarted(
self,
admin_client,
llmd_inference_service_fixture: LLMInferenceService,
):
"""Test steps:

1. Get all workload pods for the LLMInferenceService.
2. Verify no container has restarted during the upgrade.
"""
verify_llmd_pods_not_restarted(
client=admin_client,
llmisvc=llmd_inference_service_fixture,
)

@pytest.mark.post_upgrade
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
def test_llmd_router_scheduler_not_restarted(
self,
admin_client,
llmd_inference_service_fixture: LLMInferenceService,
):
"""Test steps:

1. Get the router-scheduler pod for the LLMInferenceService.
2. Verify no container has restarted during the upgrade.
"""
verify_llmd_router_not_restarted(
client=admin_client,
llmisvc=llmd_inference_service_fixture,
)

@pytest.mark.post_upgrade
@pytest.mark.dependency(depends=["llmd_llmisvc_exists"])
def test_llmd_inference_post_upgrade(self, llmd_inference_service_fixture: LLMInferenceService):
"""Test steps:

1. Send a chat completion request to /v1/chat/completions.
2. Assert the response status is 200.
3. Assert the completion text contains the expected answer.
"""
status, body = send_chat_completions(llmisvc=llmd_inference_service_fixture, prompt=PROMPT)
assert status == 200, f"Expected 200, got {status}: {body}"
completion = parse_completion_text(response_body=body)
assert EXPECTED_ANSWER in completion.lower(), f"Expected '{EXPECTED_ANSWER}' in response, got: {completion}"
90 changes: 90 additions & 0 deletions tests/model_serving/model_server/upgrade/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from kubernetes.dynamic import DynamicClient
from ocp_resources.config_map import ConfigMap
from ocp_resources.gateway import Gateway
from ocp_resources.inference_service import InferenceService
from ocp_resources.llm_inference_service import LLMInferenceService
from ocp_resources.prometheus import Prometheus
from ocp_resources.route import Route

Expand Down Expand Up @@ -315,3 +317,91 @@ def verify_isvc_internal_access(isvc: InferenceService) -> str:
raise AssertionError(f"InferenceService {isvc.name} has empty URL in status.address")

return url


def verify_llmd_pods_not_restarted(
client: DynamicClient,
llmisvc: LLMInferenceService,
max_restarts: int = 0,
) -> None:
"""
Verify that workload pods for an LLMInferenceService have not restarted.

Args:
client: DynamicClient instance
llmisvc: LLMInferenceService instance
max_restarts: Maximum allowed restart count (default 0)

Raises:
PodContainersRestartError: If any container has restarted more than max_restarts times
"""
from tests.model_serving.model_server.llmd.utils import get_llmd_workload_pods

pods = get_llmd_workload_pods(client=client, llmisvc=llmisvc)
restarted_containers: dict[str, list[str]] = {}

for pod in pods:
if pod.instance.status.containerStatuses:
for container in pod.instance.status.containerStatuses:
if container.restartCount > max_restarts:
Comment thread
threcc marked this conversation as resolved.
restarted_containers.setdefault(pod.name, []).append(
f"{container.name} (restarts: {container.restartCount})"
)

if restarted_containers:
raise PodContainersRestartError(f"LLMD workload containers restarted: {restarted_containers}")
Comment thread
threcc marked this conversation as resolved.


def verify_llmd_router_not_restarted(
client: DynamicClient,
llmisvc: LLMInferenceService,
max_restarts: int = 0,
) -> None:
"""
Verify that the router-scheduler pod for an LLMInferenceService has not restarted.

Args:
client: DynamicClient instance
llmisvc: LLMInferenceService instance
max_restarts: Maximum allowed restart count (default 0)

Raises:
PodContainersRestartError: If any container has restarted more than max_restarts times
"""
from tests.model_serving.model_server.llmd.utils import get_llmd_router_scheduler_pod

router_pod = get_llmd_router_scheduler_pod(client=client, llmisvc=llmisvc)
if not router_pod:
raise PodContainersRestartError(f"Router-scheduler pod not found for {llmisvc.name}")

restarted_containers: dict[str, list[str]] = {}
if router_pod.instance.status.containerStatuses:
for container in router_pod.instance.status.containerStatuses:
if container.restartCount > max_restarts:
restarted_containers.setdefault(router_pod.name, []).append(
f"{container.name} (restarts: {container.restartCount})"
)

if restarted_containers:
raise PodContainersRestartError(f"LLMD router-scheduler containers restarted: {restarted_containers}")


def verify_gateway_accepted(gateway: Gateway) -> None:
"""
Verify that a Gateway resource exists and has an Accepted condition.

Args:
gateway: Gateway instance

Raises:
AssertionError: If gateway does not exist or is not accepted
"""
if not gateway.exists:
raise AssertionError(f"Gateway {gateway.name} does not exist in namespace {gateway.namespace}")

conditions = gateway.instance.status.get("conditions", [])
is_accepted = any(
condition.get("type") == "Accepted" and condition.get("status") == "True" for condition in conditions
)
if not is_accepted:
raise AssertionError(f"Gateway {gateway.name} is not Accepted. Conditions: {conditions}")
Loading