11#! /bin/bash
22
3- # Usage: ./head_node_setup.sh <image_uri> <hf_token>
3+ # Usage: ./head_node_setup.sh <image_uri> <hf_token> <model_name>
44set -e
55
66log () {
77 echo " [$( date ' +%Y-%m-%d %H:%M:%S' ) ] $1 "
88}
99
10+ if [ " $# " -ne 3 ]; then
11+ log " Error: Required parameters missing"
12+ log " Usage: $0 <image_uri> <hf_token> <model_name>"
13+ exit 1
14+ fi
15+
1016IMAGE_URI=$1
1117HF_TOKEN=$2
18+ MODEL_NAME=$3
1219HEAD_IP=$( hostname -i)
20+ WORKER_IP=$( ssh compute2 " hostname -I" | awk ' {print $1}' )
1321
14- log " Starting head node setup..."
22+ log " Starting cluster setup..."
1523log " Image URI: $IMAGE_URI "
1624log " Head IP: $HEAD_IP "
25+ log " Worker IP: $WORKER_IP "
1726
1827# Start head node in tmux session and capture container ID
28+ log " Starting head node..."
1929tmux new-session -d -s ray_head " bash /fsx/vllm-dlc/vllm/examples/online_serving/run_cluster.sh \
2030 $IMAGE_URI $HEAD_IP \
2131 --head \
@@ -28,5 +38,52 @@ tmux new-session -d -s ray_head "bash /fsx/vllm-dlc/vllm/examples/online_serving
2838 --ulimit memlock=-1:-1 \
2939 -p 8000:8000"
3040
31- log " Head node started"
41+ # Wait for head node to start and get container ID
42+ sleep 10
43+ HEAD_CONTAINER_ID=$( docker ps -q --filter " ancestor=$IMAGE_URI " --filter " status=running" | head -n 1)
44+
45+ if [ -z " $HEAD_CONTAINER_ID " ]; then
46+ log " Error: Failed to get head container ID"
47+ exit 1
48+ fi
49+
50+ log " Head node started with container ID: $HEAD_CONTAINER_ID "
51+
52+ # Start worker node via SSH
53+ log " Starting worker node..."
54+ ssh compute2 " tmux new-session -d -s ray_worker 'bash /fsx/vllm-dlc/vllm/examples/online_serving/run_cluster.sh \
55+ $IMAGE_URI \
56+ $HEAD_IP \
57+ --worker \
58+ /fsx/.cache/huggingface \
59+ -e VLLM_HOST_IP=$WORKER_IP \
60+ -e FI_PROVIDER=efa \
61+ -e FI_EFA_USE_DEVICE_RDMA=1 \
62+ --device=/dev/infiniband/ \
63+ --ulimit memlock=-1:-1'"
64+
65+ log " Worker node setup initiated"
66+
67+ # Wait for worker to connect
68+ sleep 20
69+
70+ # Start vllm serve on head node
71+ log " Starting vLLM serve..."
72+ docker exec -it $HEAD_CONTAINER_ID /bin/bash -c " vllm serve $MODEL_NAME \
73+ --tensor-parallel-size 8 \
74+ --pipeline-parallel-size 2 \
75+ --max-num-batched-tokens 16384"
76+
77+ sleep 1000
78+
79+ log " vLLM serve started"
80+ log " vLLM service should now be running on port 8000"
3281
82+ curl http://localhost:8000/v1/chat/completions \
83+ -H " Content-Type: application/json" \
84+ -d ' {
85+ "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
86+ "messages": [{"role": "user", "content": "Hello, how are you?"}]
87+ }'
88+
89+ log " Setup complete. vLLM service should now be running."
0 commit comments