Skip to content

Commit ee7d7db

Browse files
add single script
1 parent 5e99d0b commit ee7d7db

File tree

2 files changed

+76
-22
lines changed

2 files changed

+76
-22
lines changed

test/vllm/ec2/test_artifacts/test_ec2.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -148,29 +148,26 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
148148
head_ip = head_connection.run("hostname -i").stdout.strip()
149149

150150
print("Starting head node...")
151-
head_connection.run(f"./head_node_setup.sh {image_uri} {hf_token}")
152-
153-
print("Starting worker node...")
154-
worker_connection.run(f"./worker_node_setup.sh {image_uri} {head_ip}")
151+
time.sleep(1000)
152+
head_connection.run(f"./head_node_setup.sh {image_uri} {hf_token} {model_name}")
155153

156-
head_container_id = get_container_id(head_connection, image_uri)
157-
print("Starting model serving inside Ray container...")
154+
# print("Starting worker node...")
155+
# worker_connection.run(f"./worker_node_setup.sh {image_uri} {head_ip}")
158156

159-
time.sleep(400)
157+
# head_container_id = get_container_id(head_connection, image_uri)
158+
# print("Starting model serving inside Ray container...")
160159

161-
commands_serving = [
162-
"tmux new-session -d -s vllm_serve",
163-
"tmux ls",
164-
"tmux attach-session -t vllm_serve",
165-
f'docker exec -it {head_container_id} /bin/bash -c "vllm serve {model_name} \
166-
--tensor-parallel-size 8 \
167-
--pipeline-parallel-size 2 \
168-
--max-num-batched-tokens 16384"',
169-
]
170-
head_connection.run("; ".join(commands_serving))
160+
# commands_serving = [
161+
# "tmux new-session -d -s vllm_serve",
162+
# "tmux ls",
163+
# "tmux attach-session -t vllm_serve",
164+
# f'docker exec -it {head_container_id} /bin/bash -c "vllm serve {model_name} \
165+
# --tensor-parallel-size 8 \
166+
# --pipeline-parallel-size 2 \
167+
# --max-num-batched-tokens 16384"',
168+
# ]
171169

172-
print("Waiting for model to load (15 minutes)...")
173-
time.sleep(1000)
170+
# head_connection.run(";".join(commands_serving), asynchronous=True)
174171

175172
print("Running benchmark...")
176173
benchmark_cmd = create_benchmark_command(model_name)

test/vllm/ec2/utils/head_node_setup.sh

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,31 @@
11
#!/bin/bash
22

3-
# Usage: ./head_node_setup.sh <image_uri> <hf_token>
3+
# Usage: ./head_node_setup.sh <image_uri> <hf_token> <model_name>
44
set -e
55

66
log() {
77
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
88
}
99

10+
if [ "$#" -ne 3 ]; then
11+
log "Error: Required parameters missing"
12+
log "Usage: $0 <image_uri> <hf_token> <model_name>"
13+
exit 1
14+
fi
15+
1016
IMAGE_URI=$1
1117
HF_TOKEN=$2
18+
MODEL_NAME=$3
1219
HEAD_IP=$(hostname -i)
20+
WORKER_IP=$(ssh compute2 "hostname -I" | awk '{print $1}')
1321

14-
log "Starting head node setup..."
22+
log "Starting cluster setup..."
1523
log "Image URI: $IMAGE_URI"
1624
log "Head IP: $HEAD_IP"
25+
log "Worker IP: $WORKER_IP"
1726

1827
# Start head node in tmux session and capture container ID
28+
log "Starting head node..."
1929
tmux new-session -d -s ray_head "bash /fsx/vllm-dlc/vllm/examples/online_serving/run_cluster.sh \
2030
$IMAGE_URI $HEAD_IP \
2131
--head \
@@ -28,5 +38,52 @@ tmux new-session -d -s ray_head "bash /fsx/vllm-dlc/vllm/examples/online_serving
2838
--ulimit memlock=-1:-1 \
2939
-p 8000:8000"
3040

31-
log "Head node started"
41+
# Wait for head node to start and get container ID
42+
sleep 10
43+
HEAD_CONTAINER_ID=$(docker ps -q --filter "ancestor=$IMAGE_URI" --filter "status=running" | head -n 1)
44+
45+
if [ -z "$HEAD_CONTAINER_ID" ]; then
46+
log "Error: Failed to get head container ID"
47+
exit 1
48+
fi
49+
50+
log "Head node started with container ID: $HEAD_CONTAINER_ID"
51+
52+
# Start worker node via SSH
53+
log "Starting worker node..."
54+
ssh compute2 "tmux new-session -d -s ray_worker 'bash /fsx/vllm-dlc/vllm/examples/online_serving/run_cluster.sh \
55+
$IMAGE_URI \
56+
$HEAD_IP \
57+
--worker \
58+
/fsx/.cache/huggingface \
59+
-e VLLM_HOST_IP=$WORKER_IP \
60+
-e FI_PROVIDER=efa \
61+
-e FI_EFA_USE_DEVICE_RDMA=1 \
62+
--device=/dev/infiniband/ \
63+
--ulimit memlock=-1:-1'"
64+
65+
log "Worker node setup initiated"
66+
67+
# Wait for worker to connect
68+
sleep 20
69+
70+
# Start vllm serve on head node
71+
log "Starting vLLM serve..."
72+
docker exec -it $HEAD_CONTAINER_ID /bin/bash -c "vllm serve $MODEL_NAME \
73+
--tensor-parallel-size 8 \
74+
--pipeline-parallel-size 2 \
75+
--max-num-batched-tokens 16384"
76+
77+
sleep 1000
78+
79+
log "vLLM serve started"
80+
log "vLLM service should now be running on port 8000"
3281

82+
curl http://localhost:8000/v1/chat/completions \
83+
-H "Content-Type: application/json" \
84+
-d '{
85+
"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
86+
"messages": [{"role": "user", "content": "Hello, how are you?"}]
87+
}'
88+
89+
log "Setup complete. vLLM service should now be running."

0 commit comments

Comments
 (0)