Skip to content

Commit f10b6df

Browse files
add delay and model ready waiter
1 parent cea0e46 commit f10b6df

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

test/vllm/ec2/test_artifacts/test_ec2.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,33 @@ def docker_cleanup(connection):
8888
connection.run("docker rm -f $(docker ps -aq)", warn=True)
8989

9090

91-
def wait_for_container_ready(connection, container_id: str, timeout: int = 300) -> bool:
91+
def wait_for_container_ready(connection, timeout: int = 1000) -> bool:
9292
"""
93-
Wait for container to be ready by checking logs
94-
Returns True if container is ready, False if timeout
93+
Wait for container and model to be ready by checking logs and endpoint
94+
Returns True if container and model are ready, False if timeout
9595
"""
9696
start_time = time.time()
97+
model_ready = False
98+
9799
while time.time() - start_time < timeout:
98-
logs = connection.run(f"docker logs {container_id}", hide=True).stdout
99-
if "Ray runtime started" in logs:
100-
return True
101-
time.sleep(10)
100+
if not model_ready:
101+
try:
102+
curl_cmd = """
103+
curl -s http://localhost:8000/v1/completions \
104+
-H "Content-Type: application/json" \
105+
-d '{
106+
"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
107+
"prompt": "Hello",
108+
"max_tokens": 10
109+
}'
110+
"""
111+
result = connection.run(curl_cmd, hide=True, warn=True)
112+
if result.ok:
113+
print("Model endpoint is responding")
114+
model_ready = True
115+
return True
116+
except Exception:
117+
pass
102118
return False
103119

104120

@@ -181,8 +197,10 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
181197
timeout=300,
182198
asynchronous=True,
183199
)
200+
184201
print("Waiting for model to be ready...")
185-
time.sleep(1000)
202+
if not wait_for_container_ready(head_connection, timeout=1000):
203+
raise Exception("Container failed to become ready within timeout period")
186204
print("Model serving started successfully")
187205

188206
# Run benchmark

0 commit comments

Comments
 (0)