Skip to content

Commit 14e317b

Browse files
authored
Merge pull request #771 from NVIDIA/am/dynamo-ports
Improve reliability in ports selection for Dynamo on Slurm
2 parents bebc6ef + 1dc5574 commit 14e317b

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

doc/workloads/ai_dynamo.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ Before running the AI Dynamo workload on a Kubernetes cluster, ensure that the c
2727
helm upgrade -n default -i kai-scheduler oci://ghcr.io/nvidia/kai-scheduler/kai-scheduler:0.0.0-4c29820
2828
2929
Launch and Monitor the Job
30-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
30+
~~~~~~~~~~~~~~~~~~~~~~~~~~
31+
32+
.. note::
33+
34+
Both CloudAI and Dynamo will try to access HuggingFace Hub. To avoid ``429 Too Many Requests`` errors and access models under auth, it is recommended to define ``HF_TOKEN`` environment variable before invoking CloudAI.
3135

3236
.. code-block:: bash
3337

src/cloudai/workloads/ai_dynamo/ai_dynamo.sh

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dynamo_args["node-setup-cmd"]=""
3030
dynamo_args["prefill-cmd"]="python3 -m dynamo.vllm --is-prefill-worker"
3131
dynamo_args["decode-cmd"]="python3 -m dynamo.vllm"
3232
dynamo_args["ingress-cmd"]="python -m dynamo.frontend --router-mode kv"
33-
dynamo_args["port"]=8080
33+
dynamo_args["port"]=$((8080 + SLURM_JOBID % 100))
3434
dynamo_args["endpoint"]="v1/chat/completions"
3535
dynamo_args["model"]="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
3636
dynamo_args["etcd-port"]=2379
@@ -641,15 +641,25 @@ function launch_decode()
641641
wait_for_etcd
642642

643643
local workers_per_node=${dynamo_args["decode-workers-per-node"]}
644-
log "Using workers per node: $workers_per_node"
644+
local tp_size=${decode_args["--${dynamo_args["tp-arg-name"]}"]}
645+
local base_nixl_port=${VLLM_NIXL_SIDE_CHANNEL_PORT:-5557}
646+
local base_kv_event_port=${DYN_VLLM_KV_EVENT_PORT:-20080}
647+
log "Launching $workers_per_node decode worker(s) with unique port ranges"
645648
646649
for i in $(seq 0 $(( $workers_per_node - 1 ))); do
647650
local gpu_list=$(_gpu_list_for_worker "${dynamo_args["decode-gpus-per-worker"]}" "$i")
648651
local log_file=$(_log_file_for_worker "decode" "$i")
652+
# Each worker needs unique port ranges to avoid ZMQ conflicts:
653+
# - NIXL side channel: base_port + (worker_index * tp_size) for TP ranks
654+
# - KV event port: one per worker
655+
local nixl_port=$((base_nixl_port + (i * tp_size)))
656+
local kv_event_port=$((base_kv_event_port + i))
649657
650-
log "Launching decode worker $i on GPUs $gpu_list"
658+
log "Launching decode worker $i on GPUs $gpu_list (NIXL port: $nixl_port, KV event port: $kv_event_port)"
651659
log "Decode cmd: ${dynamo_args["decode-cmd"]} $(array_to_args decode_args) ${decode_args["--extra-args"]}"
652660
CUDA_VISIBLE_DEVICES=$gpu_list \
661+
VLLM_NIXL_SIDE_CHANNEL_PORT=$nixl_port \
662+
DYN_VLLM_KV_EVENT_PORT=$kv_event_port \
653663
${dynamo_args["decode-cmd"]} \
654664
$(array_to_args decode_args) ${decode_args["--extra-args"]} > $log_file 2>&1 &
655665
done
@@ -669,14 +679,25 @@ function launch_prefill()
669679
wait_for_etcd
670680
671681
local workers_per_node=${dynamo_args["prefill-workers-per-node"]}
682+
local tp_size=${prefill_args["--${dynamo_args["tp-arg-name"]}"]}
683+
local base_nixl_port=${VLLM_NIXL_SIDE_CHANNEL_PORT:-5557}
684+
local base_kv_event_port=${DYN_VLLM_KV_EVENT_PORT:-20080}
685+
log "Launching $workers_per_node prefill worker(s) with unique port ranges"
672686

673687
for i in $(seq 0 $(( $workers_per_node - 1 ))); do
674688
local gpu_list=$(_gpu_list_for_worker "${dynamo_args["prefill-gpus-per-worker"]}" "$i")
675689
local log_file=$(_log_file_for_worker "prefill" "$i")
690+
# Each worker needs unique port ranges to avoid ZMQ conflicts:
691+
# - NIXL side channel: base_port + (worker_index * tp_size) for TP ranks
692+
# - KV event port: one per worker
693+
local nixl_port=$((base_nixl_port + (i * tp_size)))
694+
local kv_event_port=$((base_kv_event_port + i))
676695

677-
log "Launching prefill worker $i on GPUs $gpu_list"
696+
log "Launching prefill worker $i on GPUs $gpu_list (NIXL port: $nixl_port, KV event port: $kv_event_port)"
678697
log "Prefill cmd: ${dynamo_args["prefill-cmd"]} $(array_to_args prefill_args) ${prefill_args["--extra-args"]}"
679698
CUDA_VISIBLE_DEVICES=$gpu_list \
699+
VLLM_NIXL_SIDE_CHANNEL_PORT=$nixl_port \
700+
DYN_VLLM_KV_EVENT_PORT=$kv_event_port \
680701
${dynamo_args["prefill-cmd"]} \
681702
$(array_to_args prefill_args) ${prefill_args["--extra-args"]} > $log_file 2>&1 &
682703
done

0 commit comments

Comments
 (0)