@@ -88,17 +88,33 @@ def docker_cleanup(connection):
8888 connection .run ("docker rm -f $(docker ps -aq)" , warn = True )
8989
9090
91- def wait_for_container_ready (connection , container_id : str , timeout : int = 300 ) -> bool :
91+ def wait_for_container_ready (connection , timeout : int = 1000 ) -> bool :
9292 """
93- Wait for container to be ready by checking logs
94- Returns True if container is ready, False if timeout
93+ Wait for container and model to be ready by checking logs and endpoint
94+ Returns True if container and model are ready, False if timeout
9595 """
9696 start_time = time .time ()
97+ model_ready = False
98+
9799 while time .time () - start_time < timeout :
98- logs = connection .run (f"docker logs { container_id } " , hide = True ).stdout
99- if "Ray runtime started" in logs :
100- return True
101- time .sleep (10 )
100+ if not model_ready :
101+ try :
102+ curl_cmd = """
103+ curl -s http://localhost:8000/v1/completions \
104+ -H "Content-Type: application/json" \
105+ -d '{
106+ "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
107+ "prompt": "Hello",
108+ "max_tokens": 10
109+ }'
110+ """
111+ result = connection .run (curl_cmd , hide = True , warn = True )
112+ if result .ok :
113+ print ("Model endpoint is responding" )
114+ model_ready = True
115+ return True
116+ except Exception :
117+ pass
102118 return False
103119
104120
@@ -181,8 +197,10 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
181197 timeout = 300 ,
182198 asynchronous = True ,
183199 )
200+
184201 print ("Waiting for model to be ready..." )
185- time .sleep (1000 )
202+ if not wait_for_container_ready (head_connection , timeout = 1000 ):
203+ raise Exception ("Container failed to become ready within timeout period" )
186204 print ("Model serving started successfully" )
187205
188206 # Run benchmark
0 commit comments