Skip to content

Commit 2a29882

Browse files
authored
Wait for newer pod to be available after deployment deletion (#468)
1 parent d89bbce commit 2a29882

File tree

2 files changed

+49
-12
lines changed

2 files changed

+49
-12
lines changed

tests/model_registry/negative_tests/test_db_migration.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from utilities.constants import DscComponents
88
from tests.model_registry.constants import MR_INSTANCE_NAME
99
from kubernetes.dynamic.client import DynamicClient
10-
from utilities.general import wait_for_pods_by_labels, wait_for_container_status
11-
10+
from utilities.general import wait_for_container_status
11+
from tests.model_registry.utils import wait_for_new_running_mr_pod
1212

1313
LOGGER = get_logger(name=__name__)
1414

@@ -36,30 +36,31 @@ def test_db_migration_negative(
3636
admin_client: DynamicClient,
3737
model_registry_db_instance_pod: Pod,
3838
set_mr_db_dirty: int,
39+
model_registry_pod: Pod,
3940
delete_mr_deployment: None,
4041
):
4142
"""
4243
RHOAIENG-27505: This test is to check the migration error when the database is dirty.
4344
The test will:
4445
1. Set the dirty flag to 1 for the latest migration version
4546
2. Delete the model registry deployment
46-
3. Check the logs for the expected error
47+
3. Wait for the old pods to be terminated
48+
4. Check the logs for the expected error
4749
"""
48-
mr_pods = wait_for_pods_by_labels(
50+
LOGGER.info(f"Model registry pod: {model_registry_pod.name}")
51+
mr_pod = wait_for_new_running_mr_pod(
4952
admin_client=admin_client,
53+
orig_pod_name=model_registry_pod.name,
5054
namespace=py_config["model_registry_namespace"],
51-
label_selector=f"app={MR_INSTANCE_NAME}",
52-
expected_num_pods=1,
55+
instance_name=MR_INSTANCE_NAME,
5356
)
54-
mr_pod = mr_pods[0]
55-
LOGGER.info("Waiting for model registry pod to crash")
57+
LOGGER.info(f"Pod that should contains the container in CrashLoopBackOff state: {mr_pod.name}")
5658
assert wait_for_container_status(mr_pod, "rest-container", Pod.Status.CRASH_LOOPBACK_OFF)
5759

5860
LOGGER.info("Checking the logs for the expected error")
59-
6061
log_output = mr_pod.log(container="rest-container")
6162
expected_error = (
6263
f"Error: {{{{ALERT}}}} error connecting to datastore: Dirty database version {set_mr_db_dirty}. "
6364
"Fix and force version."
6465
)
65-
assert expected_error in log_output, "Expected error message not found in logs!"
66+
assert expected_error in log_output, f"Expected error message not found in logs!\n{log_output}"

tests/model_registry/utils.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
from ocp_resources.model_registry_modelregistry_opendatahub_io import ModelRegistry
99
from kubernetes.dynamic.exceptions import ResourceNotFoundError
1010
from simple_logger.logger import get_logger
11-
from timeout_sampler import TimeoutExpiredError, TimeoutSampler
11+
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, retry
1212
from kubernetes.dynamic.exceptions import NotFoundError
1313
from tests.model_registry.constants import MR_DB_IMAGE_DIGEST
1414
from tests.model_registry.exceptions import ModelRegistryResourceNotFoundError
1515
from utilities.exceptions import ProtocolNotSupportedError, TooManyServicesError
16-
from utilities.constants import Protocols, Annotations
16+
from utilities.constants import Protocols, Annotations, Timeout
1717
from model_registry import ModelRegistry as ModelRegistryClient
1818
from model_registry.types import RegisteredModel
1919

@@ -235,6 +235,42 @@ def wait_for_pods_running(
235235
return None
236236

237237

238+
@retry(exceptions_dict={TimeoutError: []}, wait_timeout=Timeout.TIMEOUT_2MIN, sleep=5)
239+
def wait_for_new_running_mr_pod(
240+
admin_client: DynamicClient,
241+
orig_pod_name: str,
242+
namespace: str,
243+
instance_name: str,
244+
) -> Pod:
245+
"""
246+
Wait for the model registry pod to be replaced.
247+
248+
Args:
249+
admin_client (DynamicClient): The admin client.
250+
orig_pod_name (str): The name of the original pod.
251+
namespace (str): The namespace of the pod.
252+
instance_name (str): The name of the instance.
253+
Returns:
254+
Pod object.
255+
256+
Raises:
257+
TimeoutError: If the pods are not replaced.
258+
259+
"""
260+
LOGGER.info("Waiting for pod to be replaced")
261+
pods = list(
262+
Pod.get(
263+
dyn_client=admin_client,
264+
namespace=namespace,
265+
label_selector=f"app={instance_name}",
266+
)
267+
)
268+
if pods and len(pods) == 1:
269+
if pods[0].name != orig_pod_name and pods[0].status == Pod.Status.RUNNING:
270+
return pods[0]
271+
raise TimeoutError(f"Timeout waiting for pod {orig_pod_name} to be replaced")
272+
273+
238274
def generate_namespace_name(file_path: str) -> str:
239275
return (file_path.removesuffix(".py").replace("/", "-").replace("_", "-"))[-63:].split("-", 1)[-1]
240276

0 commit comments

Comments
 (0)