1+ from __future__ import annotations
2+
13from typing import Any
24
5+ from kubernetes .dynamic import DynamicClient
36from ocp_resources .inference_service import InferenceService
47from simple_logger .logger import get_logger
58from timeout_sampler import TimeoutExpiredError , TimeoutSampler
69
710from tests .model_serving .model_server .utils import verify_inference_response
811from utilities .constants import Timeout
912from utilities .exceptions import InferenceCanaryTrafficError
13+ from utilities .infra import get_pods_by_isvc_label
1014
1115LOGGER = get_logger (name = __name__ )
1216
@@ -51,7 +55,8 @@ def verify_canary_traffic(
5155 protocol : str ,
5256 model_name : str ,
5357 iterations : int ,
54- percentage : int ,
58+ expected_percentage : int ,
59+ tolerance : int = 0 ,
5560) -> None :
5661 """
5762 Verify canary traffic percentage against inference_config.
@@ -63,15 +68,17 @@ def verify_canary_traffic(
6368 protocol (str): Protocol.
6469 model_name (str): Model name.
6570 iterations (int): Number of iterations.
66- percentage (int): Percentage of canary rollout.
71+ expected_percentage (int): Percentage of canary rollout.
72+ tolerance (int): Tolerance of traffic percentage distribution;
73+ difference between actual and expected percentage.
6774
6875 Raises:
6976 InferenceCanaryTrafficError: If canary rollout is not updated
7077
7178 """
7279 successful_inferences = 0
7380
74- for _ in range (iterations ):
81+ for iteration in range (iterations ):
7582 try :
7683 verify_inference_response (
7784 inference_service = isvc ,
@@ -81,16 +88,42 @@ def verify_canary_traffic(
8188 model_name = model_name ,
8289 use_default_query = True ,
8390 )
91+ LOGGER .info (f"Successful inference. Iteration: { iteration + 1 } " )
8492
8593 successful_inferences += 1
8694
87- except Exception :
88- continue
95+ except Exception as ex :
96+ LOGGER . warning ( f"Inference failed. Error: { ex } . Previous model was used." )
8997
98+ LOGGER .info (f"Number of inference requests to the new model: { successful_inferences } " )
9099 successful_inferences_percentage = successful_inferences / iterations * 100
91100
92- if successful_inferences_percentage != percentage :
101+ diff_percentage = abs (expected_percentage - successful_inferences_percentage )
102+
103+ if successful_inferences == 0 or diff_percentage > tolerance :
93104 raise InferenceCanaryTrafficError (
94105 f"Percentage of inference requests { successful_inferences_percentage } "
95- f"to the new model does not match the expected percentage { percentage } . "
106+ f"to the new model does not match the expected percentage { expected_percentage } . "
96107 )
108+
109+
110+ def inference_service_pods_sampler (client : DynamicClient , isvc : InferenceService , timeout : int ) -> TimeoutSampler :
111+ """
112+ Returns TimeoutSampler for inference service.
113+
114+ Args:
115+ client (DynamicClient): DynamicClient object
116+ isvc (InferenceService): InferenceService object
117+ timeout (int): Timeout in seconds
118+
119+ Returns:
120+ TimeoutSampler: TimeoutSampler object
121+
122+ """
123+ return TimeoutSampler (
124+ wait_timeout = timeout ,
125+ sleep = 1 ,
126+ func = get_pods_by_isvc_label ,
127+ client = client ,
128+ isvc = isvc ,
129+ )
0 commit comments