Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tests/model_registry/negative_tests/test_db_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from utilities.constants import DscComponents
from tests.model_registry.constants import MR_INSTANCE_NAME
from kubernetes.dynamic.client import DynamicClient
from utilities.general import wait_for_pods_by_labels
from utilities.general import wait_for_pods_by_labels, wait_for_container_status


LOGGER = get_logger(name=__name__)
Expand Down Expand Up @@ -52,6 +52,9 @@ def test_db_migration_negative(
expected_num_pods=1,
)
mr_pod = mr_pods[0]
LOGGER.info("Waiting for model registry pod to crash")
assert wait_for_container_status(mr_pod, "rest-container", Pod.Status.CRASH_LOOPBACK_OFF)

LOGGER.info("Checking the logs for the expected error")

log_output = mr_pod.log(container="rest-container")
Expand Down
53 changes: 50 additions & 3 deletions utilities/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
import uuid

from kubernetes.dynamic import DynamicClient
from kubernetes.dynamic.exceptions import ResourceNotFoundError
from kubernetes.dynamic.exceptions import ResourceNotFoundError, NotFoundError
from ocp_resources.inference_graph import InferenceGraph
from ocp_resources.inference_service import InferenceService
from ocp_resources.pod import Pod
from simple_logger.logger import get_logger

import utilities.infra
from utilities.constants import Annotations, KServeDeploymentType, MODELMESH_SERVING
from utilities.exceptions import UnexpectedResourceCountError
from utilities.constants import Annotations, KServeDeploymentType, MODELMESH_SERVING, Timeout
from utilities.exceptions import UnexpectedResourceCountError, ResourceValueMismatch
from ocp_resources.resource import Resource
from timeout_sampler import retry

Expand Down Expand Up @@ -331,3 +331,50 @@ def generate_random_name(prefix: str = "", length: int = 8) -> str:
# random_uuid.hex is 32 characters long.
suffix = random_uuid.hex[:length]
return f"{prefix}-{suffix}" if prefix else suffix


@retry(
wait_timeout=Timeout.TIMEOUT_15_SEC,
sleep=1,
exceptions_dict={ResourceValueMismatch: [], ResourceNotFoundError: [], NotFoundError: []},
)
def wait_for_container_status(pod: Pod, container_name: str, expected_status: str) -> bool:
"""
Wait for a container to be in the expected status.

Args:
pod: The pod to wait for
container_name: The name of the container to wait for
expected_status: The expected status

Returns:
bool: True if the container is in the expected status, False otherwise

Raises:
ResourceValueMismatch: If the container is not in the expected status
"""

container_status = None
for cs in pod.instance.status.get("containerStatuses", []):
if cs.name == container_name:
container_status = cs
break
if container_status is None:
raise ResourceValueMismatch(f"Container {container_name} not found in pod {pod.name}")

if container_status.state.waiting:
reason = container_status.state.waiting.reason
elif container_status.state.terminated:
reason = container_status.state.terminated.reason
elif container_status.state.running:
# Running container does not have a reason
reason = "Running"
else:
raise ResourceValueMismatch(
f"{container_name} in {pod.name} is in an unrecognized or transitional state: {container_status.state}"
)

if reason == expected_status:
LOGGER.info(f"Container {container_name} is in the expected status {expected_status}")
return True
raise ResourceValueMismatch(f"Container {container_name} is not in the expected status {container_status.state}")