Skip to content

Commit 6bc91d0

Browse files
authored
Wait for dsc and dsci ready state in cluster_sanity check (#293)
1 parent 44907ca commit 6bc91d0

File tree

1 file changed

+27
-10
lines changed

1 file changed

+27
-10
lines changed

utilities/infra.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -851,16 +851,33 @@ def wait_for_isvc_pods(client: DynamicClient, isvc: InferenceService, runtime_na
851851
return get_pods_by_isvc_label(client=client, isvc=isvc, runtime_name=runtime_name)
852852

853853

854-
def verify_dsci_status_ready(dsci_resource: DSCInitialization) -> None:
855-
LOGGER.info(f"Verify DSCI {dsci_resource.name} are {dsci_resource.Status.READY}.")
856-
if dsci_resource.status != dsci_resource.Status.READY:
857-
raise ResourceNotReadyError(f"DSCI {dsci_resource.name} is not ready.\nStatus: {dsci_resource.instance.status}")
854+
@retry(
855+
wait_timeout=120,
856+
sleep=5,
857+
exceptions_dict={ResourceNotReadyError: []},
858+
)
859+
def wait_for_dsci_status_ready(dsci_resource: DSCInitialization) -> bool:
860+
LOGGER.info(f"Wait for DSCI {dsci_resource.name} to be in {dsci_resource.Status.READY} status.")
861+
if dsci_resource.status == dsci_resource.Status.READY:
862+
return True
863+
864+
raise ResourceNotReadyError(
865+
f"DSCI {dsci_resource.name} is not ready.\nCurrent status: {dsci_resource.instance.status}"
866+
)
858867

859868

860-
def verify_dsc_status_ready(dsc_resource: DataScienceCluster) -> None:
861-
LOGGER.info(f"Verify DSC {dsc_resource.name} are {dsc_resource.Status.READY}.")
862-
if dsc_resource.status != dsc_resource.Status.READY:
863-
raise ResourceNotReadyError(f"DSC {dsc_resource.name} is not ready.\nStatus: {dsc_resource.instance.status}")
869+
@retry(
870+
wait_timeout=120,
871+
sleep=5,
872+
exceptions_dict={ResourceNotReadyError: []},
873+
)
874+
def wait_for_dsc_status_ready(dsc_resource: DataScienceCluster) -> bool:
875+
LOGGER.info(f"Wait for DSC {dsc_resource.name} are {dsc_resource.Status.READY}.")
876+
if dsc_resource.status == dsc_resource.Status.READY:
877+
return True
878+
raise ResourceNotReadyError(
879+
f"DSC {dsc_resource.name} is not ready.\nCurrent status: {dsc_resource.instance.status}"
880+
)
864881

865882

866883
def verify_cluster_sanity(
@@ -898,8 +915,8 @@ def verify_cluster_sanity(
898915
LOGGER.warning(f"Skipping RHOAI resource checks, got {skip_rhoai_check}")
899916

900917
else:
901-
verify_dsci_status_ready(dsci_resource=dsci_resource)
902-
verify_dsc_status_ready(dsc_resource=dsc_resource)
918+
wait_for_dsci_status_ready(dsci_resource=dsci_resource)
919+
wait_for_dsc_status_ready(dsc_resource=dsc_resource)
903920

904921
except (ResourceNotReadyError, NodeUnschedulableError, NodeNotReadyError) as ex:
905922
error_msg = f"Cluster sanity check failed: {str(ex)}"

0 commit comments

Comments
 (0)