Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
808a6a2
fixing TestServerlessScaleToZero test and adding new wait_for_deploym…
brettmthompson Apr 25, 2025
d35c80f
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson Apr 25, 2025
51699a7
removing wait_for_deployments func and reworking wait_for_inference_d…
brettmthompson Apr 28, 2025
060709f
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson Apr 28, 2025
42811bf
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson Apr 29, 2025
cfc0b19
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 5, 2025
3b694bf
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 8, 2025
1f08694
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 9, 2025
bf5a484
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 12, 2025
df9a84a
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 12, 2025
dbf255a
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 12, 2025
6ff24e1
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 12, 2025
cd36a73
adding new UnexpectedResourceCountError and now using dyn_client inpu…
brettmthompson May 13, 2025
e4ad9e5
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 15, 2025
e4e618e
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 16, 2025
a462abf
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 19, 2025
43ff06a
Merge branch 'main' into bugfix/fix-test-scale-to-zero
brettmthompson May 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pytest
from ocp_resources.deployment import Deployment

from tests.model_serving.model_server.serverless.utils import verify_no_inference_pods
from tests.model_serving.model_server.utils import verify_inference_response
Expand All @@ -10,9 +9,9 @@
Protocols,
RunTimeConfigs,
)
from utilities.exceptions import DeploymentValidationError
from utilities.inference_utils import Inference
from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG
from utilities.infra import wait_for_inference_deployment_replicas

pytestmark = [
pytest.mark.serverless,
Expand Down Expand Up @@ -95,14 +94,9 @@ def test_no_serverless_pods_when_no_traffic(self, unprivileged_client, inference
@pytest.mark.order(5)
def test_serverless_pods_after_scale_to_one_replica(self, unprivileged_client, inference_service_patched_replicas):
"""Verify pod is running after scaling to 1 replica"""
for deployment in Deployment.get(
wait_for_inference_deployment_replicas(
Comment thread
brettmthompson marked this conversation as resolved.
client=unprivileged_client,
namespace=inference_service_patched_replicas.namespace,
):
if deployment.labels["serving.knative.dev/configurationGeneration"] == "3":
deployment.wait_for_replicas()
return

raise DeploymentValidationError(
f"Inference Service {inference_service_patched_replicas.name} new deployment not found"
isvc=inference_service_patched_replicas,
expected_num_deployments=1,
labels="serving.knative.dev/configurationGeneration=3",
)
88 changes: 59 additions & 29 deletions utilities/infra.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@
FailedPodsError,
ResourceNotReadyError,
)
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, retry
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch, retry
import utilities.general
from ocp_resources.utils.constants import DEFAULT_CLUSTER_RETRY_EXCEPTIONS

LOGGER = get_logger(name=__name__)

Expand Down Expand Up @@ -151,13 +152,14 @@ def create_ns(
wait_for_serverless_pods_deletion(resource=ns, admin_client=client)


def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int) -> None:
def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int, timeout: int = Timeout.TIMEOUT_2MIN) -> None:
"""
Wait for replicas in deployment to updated in spec.

Args:
deployment (Deployment): Deployment object
replicas (int): number of replicas to be set in spec.replicas
timeout (int): Time to wait for the model deployment.

Raises:
TimeoutExpiredError: If replicas are not updated in spec.
Expand All @@ -167,7 +169,7 @@ def wait_for_replicas_in_deployment(deployment: Deployment, replicas: int) -> No

try:
for sample in TimeoutSampler(
wait_timeout=Timeout.TIMEOUT_2MIN,
wait_timeout=timeout,
sleep=5,
func=lambda: deployment.instance,
):
Expand All @@ -186,6 +188,8 @@ def wait_for_inference_deployment_replicas(
isvc: InferenceService,
runtime_name: str | None = None,
expected_num_deployments: int = 1,
labels: str = "",
deployed: bool = True,
timeout: int = Timeout.TIMEOUT_5MIN,
) -> list[Deployment]:
"""
Expand All @@ -196,49 +200,75 @@ def wait_for_inference_deployment_replicas(
isvc (InferenceService): InferenceService object
runtime_name (str): ServingRuntime name.
expected_num_deployments (int): Expected number of deployments per InferenceService.
labels (str): Comma seperated list of labels, in key=value format, used to filter deployments.
deployed (bool): True for replicas deployed, False for no replicas.
timeout (int): Time to wait for the model deployment.

Returns:
list[Deployment]: List of Deployment objects for InferenceService.

Raises:
TimeoutExpiredError: If an exception is raised when retrieving deployments or
timeout expires when checking replicas.
ResourceNotUniqueError: If a greater number of deployments exist than expected after timeout.
ResourceNotFoundError: If a less number of deployments exist than expected after timeout.
Comment thread
brettmthompson marked this conversation as resolved.
Outdated

"""
timeout_watcher = TimeoutWatch(timeout=timeout)
ns = isvc.namespace
label_selector = utilities.general.create_isvc_label_selector_str(
isvc=isvc, resource_type="deployment", runtime_name=runtime_name
)
if labels:
label_selector += f",{labels}"
Comment thread
dbasunag marked this conversation as resolved.

deployments = list(
Deployment.get(
deployment_list = []
try:
for deployments in TimeoutSampler(
wait_timeout=timeout_watcher.remaining_time(),
Comment thread
dbasunag marked this conversation as resolved.
sleep=5,
exceptions_dict=DEFAULT_CLUSTER_RETRY_EXCEPTIONS,
func=Deployment.get,
label_selector=label_selector,
client=client,
Comment thread
brettmthompson marked this conversation as resolved.
Outdated
namespace=isvc.namespace,
)
)
namespace=ns,
):
deployment_list = list(deployments)
Comment thread
rnetser marked this conversation as resolved.
if len(deployment_list) == expected_num_deployments:
break
except TimeoutExpiredError as e:
# If the last exception raised prior to the timeout expiring is None, this means that
# the deployments were successfully retrieved, but the expected number was not found.
if e.last_exp is None:
if len(deployment_list) > expected_num_deployments:
raise ResourceNotUniqueError(
f"Too many predictor deployments found in namespace {ns} after timeout. "
f"Expected {expected_num_deployments}, but found {len(deployment_list)}."
)
raise ResourceNotFoundError(
f"Predictor deployment(s) not found in namespace {ns} after timeout. "
f"Expected {expected_num_deployments}, but found {len(deployment_list)}."
)
raise

LOGGER.info("Waiting for inference deployment replicas to complete")
if len(deployments) == expected_num_deployments:
for deployment in deployments:
if deployment.exists:
# Raw deployment: if min replicas is more than 1, wait for min replicas
# to be set in deployment spec by HPA
if (
isvc.instance.metadata.annotations.get("serving.kserve.io/deploymentMode")
== KServeDeploymentType.RAW_DEPLOYMENT
):
wait_for_replicas_in_deployment(
deployment=deployments[0],
replicas=isvc.instance.spec.predictor.get("minReplicas", 1),
)

deployment.wait_for_replicas(timeout=timeout)

return deployments
for deployment in deployment_list:
if deployment.exists:
Comment thread
dbasunag marked this conversation as resolved.
# Raw deployment: if min replicas is more than 1, wait for min replicas
# to be set in deployment spec by HPA
if (
isvc.instance.metadata.annotations.get("serving.kserve.io/deploymentMode")
== KServeDeploymentType.RAW_DEPLOYMENT
):
wait_for_replicas_in_deployment(
deployment=deployment,
replicas=isvc.instance.spec.predictor.get("minReplicas", 1),
timeout=timeout_watcher.remaining_time(),
)

elif len(deployments) > expected_num_deployments:
raise ResourceNotUniqueError(f"Multiple predictor deployments found in namespace {ns}")
deployment.wait_for_replicas(deployed=deployed, timeout=timeout_watcher.remaining_time())

else:
raise ResourceNotFoundError(f"Predictor deployment not found in namespace {ns}")
return deployment_list


@contextmanager
Expand Down