Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
78 commits
Select commit Hold shift + click to select a range
b362382
Create size-labeler.yml
rnetser Dec 18, 2024
3c6a875
Delete .github/workflows/size-labeler.yml
rnetser Dec 18, 2024
ccb63af
Merge branch 'main' of github.com:rnetser/opendatahub-tests
rnetser Dec 24, 2024
da0c898
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 25, 2024
94a82ec
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 26, 2024
c0c82dd
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 27, 2024
5feb447
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 30, 2024
19b9c56
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
e22ac1a
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
56ab9c5
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
5a17f03
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
ef5fe65
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
1875a44
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 1, 2025
840d442
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 2, 2025
c0d4436
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 2, 2025
ba7971a
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 3, 2025
fd73a94
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 7, 2025
bde0493
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 8, 2025
d3cd799
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 9, 2025
710befa
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 12, 2025
a662364
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 16, 2025
579c283
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 16, 2025
927cbb0
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 19, 2025
99e242e
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 22, 2025
5b83bab
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 24, 2025
4b5b007
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 27, 2025
b8e5dee
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 30, 2025
0039df0
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 30, 2025
17938d6
model mesh - add auth tests
rnetser Feb 2, 2025
44a3120
xx
rnetser Feb 2, 2025
a418727
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 3, 2025
d291c32
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 4, 2025
b6650d8
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 5, 2025
8b9f838
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 7, 2025
d53a04c
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 10, 2025
db89111
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 10, 2025
773d81a
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 12, 2025
01be6e9
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 13, 2025
71df8d5
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 16, 2025
8896bd7
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 18, 2025
0eeb162
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Feb 18, 2025
38e8a69
ci: on main upstream xxxxxxxxxxx
rnetser Feb 19, 2025
875f597
ci: reabsingxxxxxxxxxxxxxx
rnetser Feb 20, 2025
c90abf6
ci: reabsingxxxxxxxxxxxxxx
rnetser Feb 20, 2025
4a26346
ci: Merge branch 'main' of https://github.com/opendatahub-io/opendata…
rnetser Feb 24, 2025
8302ed1
ci: Merge branch 'main' of https://github.com/opendatahub-io/opendata…
rnetser Feb 25, 2025
ff1b155
ci: rebase on main
rnetser Mar 6, 2025
a04a3c4
ci: merge main branch
rnetser Mar 11, 2025
d1cb99b
ci: merge branch main
rnetser Mar 12, 2025
87faa7b
ci: merge branch main
rnetser Mar 17, 2025
dc8490d
ci: merge branch main
rnetser Mar 17, 2025
5856788
ci: merge main branch
rnetser Mar 18, 2025
a865aa8
ci: merge main branch
rnetser Mar 18, 2025
e8be67d
ci: merge main branch
rnetser Mar 18, 2025
e8c8b38
ci: merge main branch
rnetser Mar 20, 2025
667cb70
ci: merge main branch
rnetser Mar 20, 2025
a122d95
ci: merge main branch
rnetser Mar 20, 2025
defca5d
ci: merge main branch
rnetser Mar 21, 2025
d8879ae
ci: merge with main
rnetser Mar 24, 2025
a0477ca
ci: merge with main
rnetser Mar 24, 2025
0b37eb7
ci: merge with main
rnetser Mar 24, 2025
79b9288
ci: merge with main
rnetser Mar 24, 2025
3b9d6a1
ci: merge with main
rnetser Mar 25, 2025
a91c002
ci: merge with main
rnetser Mar 26, 2025
ba7a2a4
ci: merge main branch
rnetser Mar 27, 2025
7fd9c92
ci: merge main branch
rnetser Mar 27, 2025
96c2146
ci: merge main branch
rnetser Mar 31, 2025
dcdfc7a
ci: merge main branch
rnetser Mar 31, 2025
beaa1e5
ci: merge main branch
rnetser Apr 2, 2025
4229d23
feat: fail fast on no pods
rnetser Apr 2, 2025
f5a3256
ci: merge main branch
rnetser Apr 2, 2025
13d13d3
ci: merge main branch
rnetser Apr 3, 2025
e6883af
fix: update fail fast
rnetser Apr 3, 2025
10335e4
fix: update fail fast
rnetser Apr 3, 2025
66d2523
ci: merge main branch
rnetser Apr 5, 2025
6bedc2e
fix: update fail fast
rnetser Apr 6, 2025
d8a0dcc
fix: removed unused exception
rnetser Apr 6, 2025
50b3e59
ci: merge main branch
rnetser Apr 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from utilities.inference_utils import Inference, UserInference

LOGGER = get_logger(name=__name__)
TIMEOUT_30SEC: int = 30

TRUSTYAI_SERVICE_NAME: str = "trustyai-service"

Expand Down
1 change: 1 addition & 0 deletions utilities/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ class Nvidia:


class Timeout:
TIMEOUT_30SEC: int = 30
TIMEOUT_1MIN: int = 60
TIMEOUT_2MIN: int = 2 * TIMEOUT_1MIN
TIMEOUT_4MIN: int = 4 * TIMEOUT_1MIN
Expand Down
2 changes: 1 addition & 1 deletion utilities/inference_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def run_inference_flow(
except JSONDecodeError:
return {"output": out}

@retry(wait_timeout=30, sleep=5)
@retry(wait_timeout=Timeout.TIMEOUT_30SEC, sleep=5)
def run_inference(self, cmd: str) -> str:
"""
Run inference command
Expand Down
40 changes: 31 additions & 9 deletions utilities/infra.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from utilities.constants import KServeDeploymentType
from utilities.constants import Annotations
from utilities.exceptions import FailedPodsError
from timeout_sampler import TimeoutExpiredError, TimeoutSampler
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, retry
import utilities.general

LOGGER = get_logger(name=__name__)
Expand Down Expand Up @@ -576,17 +576,20 @@ def verify_no_failed_pods(
timeout: int = Timeout.TIMEOUT_5MIN,
) -> None:
"""
Verify no failed pods.
Verify pods created and no failed pods.

Args:
client (DynamicClient): DynamicClient object
isvc (InferenceService): InferenceService object
runtime_name (str): ServingRuntime name
timeout (int): Time to wait for the pod.

Raises:
FailedPodsError: If any pod is in failed state
FailedPodsError: If any pod is in failed state

"""
wait_for_isvc_pods(client=client, isvc=isvc, runtime_name=runtime_name)

LOGGER.info("Verifying no failed pods")
for pods in TimeoutSampler(
wait_timeout=timeout,
Expand All @@ -612,12 +615,16 @@ def verify_no_failed_pods(
pod_status = pod.instance.status

if pod_status.containerStatuses:
for container_status in pod_status.containerStatuses:
for container_status in pod_status.get("containerStatuses", []) + pod_status.get(
"initContainerStatuses", []
):
is_waiting_pull_back_off = (
wait_state := container_status.state.waiting
) and wait_state.reason in (
pod.Status.IMAGE_PULL_BACK_OFF,
pod.Status.CRASH_LOOPBACK_OFF,
pod.Status.ERR_IMAGE_PULL,
"InvalidImageName",
)

is_terminated_error = (
Expand All @@ -630,11 +637,6 @@ def verify_no_failed_pods(
if is_waiting_pull_back_off or is_terminated_error:
failed_pods[pod.name] = pod_status

if init_container_status := pod_status.initContainerStatuses:
if container_terminated := init_container_status[0].lastState.terminated:
if container_terminated.reason == "Error":
failed_pods[pod.name] = pod_status

elif pod_status.phase in (
pod.Status.CRASH_LOOPBACK_OFF,
pod.Status.FAILED,
Expand Down Expand Up @@ -781,3 +783,23 @@ def wait_for_serverless_pods_deletion(resource: Project | Namespace, admin_clien
):
LOGGER.info(f"Waiting for {KServeDeploymentType.SERVERLESS} pod {pod.name} to be deleted")
pod.wait_deleted(timeout=Timeout.TIMEOUT_1MIN)


@retry(wait_timeout=Timeout.TIMEOUT_30SEC, sleep=1, exceptions_dict={ResourceNotFoundError: []})
def wait_for_isvc_pods(client: DynamicClient, isvc: InferenceService, runtime_name: str | None = None) -> list[Pod]:
"""
Wait for ISVC pods.

Args:
client (DynamicClient): DynamicClient object
isvc (InferenceService): InferenceService object
runtime_name (ServingRuntime): ServingRuntime name

Returns:
list[Pod]: A list of all matching pods

Raises:
TimeoutExpiredError: If pods do not exist
"""
LOGGER.info("Waiting for pods to be created")
return get_pods_by_isvc_label(client=client, isvc=isvc, runtime_name=runtime_name)