11import re
2- import time
32
43import pandas as pd
54import structlog
87from ocp_resources .lm_eval_job import LMEvalJob
98from ocp_resources .pod import Pod
109from pyhelper_utils .general import tts
11- from timeout_sampler import TimeoutExpiredError
10+ from timeout_sampler import TimeoutExpiredError , TimeoutSampler
1211
1312from utilities .constants import Timeout
14- from utilities .exceptions import PodLogMissMatchError , UnexpectedFailureError
13+ from utilities .exceptions import (
14+ PodLogMissMatchError ,
15+ ResourceNotFoundError ,
16+ UnexpectedFailureError ,
17+ UnexpectedResourceCountError ,
18+ )
19+ from utilities .general import collect_pod_information
1520
1621LOGGER = structlog .get_logger (name = __name__ )
1722
@@ -116,7 +121,6 @@ def wait_for_vllm_model_ready(
116121 inference_service_name : str ,
117122 max_wait_time : int = 600 ,
118123 check_interval : int = 10 ,
119- stabilization_wait : int = 10 ,
120124) -> Pod :
121125 """Wait for vLLM model to download and be ready to serve requests.
122126
@@ -126,12 +130,12 @@ def wait_for_vllm_model_ready(
126130 inference_service_name: Name of the inference service
127131 max_wait_time: Maximum time to wait in seconds
128132 check_interval: Time between checks in seconds
129- stabilization_wait: Seconds to wait after model is ready for server stabilization
130133
131134 Returns:
132135 The predictor pod once model is ready
133136
134137 Raises:
138+ ResourceNotFoundError: If no predictor pod is found
135139 UnexpectedFailureError: If model fails to load or pod encounters errors
136140 """
137141 LOGGER .info ("Waiting for vLLM model to download and load..." )
@@ -140,49 +144,46 @@ def wait_for_vllm_model_ready(
140144 Pod .get (
141145 dyn_client = client ,
142146 namespace = namespace ,
143- label_selector = f"serving.kserve.io/inferenceservice={ inference_service_name } " ,
147+ label_selector = f"serving.kserve.io/inferenceservice={ inference_service_name } ,component=predictor " ,
144148 )
145149 )
146150
147151 if not predictor_pods :
148- raise UnexpectedFailureError ( "No predictor pod found for inference service" )
152+ raise ResourceNotFoundError ( f "No predictor pod found for inference service ' { inference_service_name } '. " )
149153
150- predictor_pods = [pod for pod in predictor_pods if "predictor" in pod .name ]
151-
152- if not predictor_pods :
153- raise UnexpectedFailureError ("No predictor pod found for inference service" )
154+ if len (predictor_pods ) != 1 :
155+ raise UnexpectedResourceCountError (
156+ f"Expected exactly 1 predictor pod for inference service '{ inference_service_name } ', "
157+ f"but found { len (predictor_pods )} : { [pod .name for pod in predictor_pods ]} "
158+ )
154159
155160 predictor_pod = predictor_pods [0 ]
156161 LOGGER .info (f"Predictor pod: { predictor_pod .name } " )
157162
158- elapsed_time = 0
159- model_loaded = False
160-
161- while elapsed_time < max_wait_time :
163+ def _check_model_ready () -> bool :
162164 try :
163165 pod_logs = predictor_pod .log (container = "kserve-container" )
164-
165166 if "Uvicorn running on" in pod_logs or "Application startup complete" in pod_logs :
166167 LOGGER .info ("vLLM server is running and ready!" )
167- model_loaded = True
168- break
168+ return True
169169 else :
170- LOGGER .info (f"Model still loading... (waited { elapsed_time } s)" )
170+ LOGGER .info ("Model still loading.." )
171+ return False
171172 except (ApiException , OSError ) as e :
172173 LOGGER .info (f"Could not get pod logs yet: { e } " )
174+ return False
173175
174- time .sleep (check_interval )
175- elapsed_time += check_interval
176-
177- if not model_loaded :
178- try :
179- full_logs = predictor_pod .log (container = "kserve-container" )
180- LOGGER .error (f"vLLM pod failed to start within { max_wait_time } s. Full logs:\n { full_logs } " )
181- except (ApiException , OSError ) as e :
182- LOGGER .error (f"Could not retrieve pod logs: { e } " )
183- raise UnexpectedFailureError (f"vLLM model failed to load within { max_wait_time } seconds" )
184-
185- LOGGER .info (f"Model loaded! Waiting { stabilization_wait } more seconds for server stabilization." )
186- time .sleep (stabilization_wait )
176+ try :
177+ for sample in TimeoutSampler (
178+ wait_timeout = max_wait_time ,
179+ sleep = check_interval ,
180+ func = _check_model_ready ,
181+ ):
182+ if sample :
183+ break
184+ except TimeoutExpiredError as e :
185+ LOGGER .error (f"vLLM pod failed to start within { max_wait_time } seconds" )
186+ collect_pod_information (pod = predictor_pod )
187+ raise UnexpectedFailureError (f"vLLM model failed to load within { max_wait_time } seconds" ) from e
187188
188189 return predictor_pod
0 commit comments