@@ -30,7 +30,7 @@ dynamo_args["node-setup-cmd"]=""
3030dynamo_args[" prefill-cmd" ]=" python3 -m dynamo.vllm --is-prefill-worker"
3131dynamo_args[" decode-cmd" ]=" python3 -m dynamo.vllm"
3232dynamo_args[" ingress-cmd" ]=" python -m dynamo.frontend --router-mode kv"
33- dynamo_args[" port" ]=8080
33+ dynamo_args[" port" ]=$(( 8080 + SLURM_JOBID % 100 ))
3434dynamo_args[" endpoint" ]=" v1/chat/completions"
3535dynamo_args[" model" ]=" deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
3636dynamo_args[" etcd-port" ]=2379
@@ -641,15 +641,25 @@ function launch_decode()
641641 wait_for_etcd
642642
643643 local workers_per_node=${dynamo_args["decode-workers-per-node"]}
644- log " Using workers per node: $workers_per_node "
644+ local tp_size=${decode_args["--${dynamo_args["tp-arg-name"]} " ]}
645+ local base_nixl_port=${VLLM_NIXL_SIDE_CHANNEL_PORT:- 5557}
646+ local base_kv_event_port=${DYN_VLLM_KV_EVENT_PORT:- 20080}
647+ log " Launching $workers_per_node decode worker(s) with unique port ranges"
645648
646649 for i in $( seq 0 $(( $workers_per_node - 1 )) ) ; do
647650 local gpu_list=$( _gpu_list_for_worker " ${dynamo_args["decode-gpus-per-worker"]} " " $i " )
648651 local log_file=$( _log_file_for_worker " decode" " $i " )
652+ # Each worker needs unique port ranges to avoid ZMQ conflicts:
653+ # - NIXL side channel: base_port + (worker_index * tp_size) for TP ranks
654+ # - KV event port: one per worker
655+ local nixl_port=$(( base_nixl_port + (i * tp_size)) )
656+ local kv_event_port=$(( base_kv_event_port + i))
649657
650- log " Launching decode worker $i on GPUs $gpu_list "
658+ log " Launching decode worker $i on GPUs $gpu_list (NIXL port: $nixl_port , KV event port: $kv_event_port ) "
651659 log " Decode cmd: ${dynamo_args["decode-cmd"]} $( array_to_args decode_args) ${decode_args["--extra-args"]} "
652660 CUDA_VISIBLE_DEVICES=$gpu_list \
661+ VLLM_NIXL_SIDE_CHANNEL_PORT=$nixl_port \
662+ DYN_VLLM_KV_EVENT_PORT=$kv_event_port \
653663 ${dynamo_args["decode-cmd"]} \
654664 $( array_to_args decode_args) ${decode_args["--extra-args"]} > $log_file 2>&1 &
655665 done
@@ -669,14 +679,25 @@ function launch_prefill()
669679 wait_for_etcd
670680
671681 local workers_per_node=${dynamo_args["prefill-workers-per-node"]}
682+ local tp_size=${prefill_args["--${dynamo_args["tp-arg-name"]} " ]}
683+ local base_nixl_port=${VLLM_NIXL_SIDE_CHANNEL_PORT:- 5557}
684+ local base_kv_event_port=${DYN_VLLM_KV_EVENT_PORT:- 20080}
685+ log " Launching $workers_per_node prefill worker(s) with unique port ranges"
672686
673687 for i in $( seq 0 $(( $workers_per_node - 1 )) ) ; do
674688 local gpu_list=$( _gpu_list_for_worker " ${dynamo_args["prefill-gpus-per-worker"]} " " $i " )
675689 local log_file=$( _log_file_for_worker " prefill" " $i " )
690+ # Each worker needs unique port ranges to avoid ZMQ conflicts:
691+ # - NIXL side channel: base_port + (worker_index * tp_size) for TP ranks
692+ # - KV event port: one per worker
693+ local nixl_port=$(( base_nixl_port + (i * tp_size)) )
694+ local kv_event_port=$(( base_kv_event_port + i))
676695
677- log " Launching prefill worker $i on GPUs $gpu_list "
696+ log " Launching prefill worker $i on GPUs $gpu_list (NIXL port: $nixl_port , KV event port: $kv_event_port ) "
678697 log " Prefill cmd: ${dynamo_args["prefill-cmd"]} $( array_to_args prefill_args) ${prefill_args["--extra-args"]} "
679698 CUDA_VISIBLE_DEVICES=$gpu_list \
699+ VLLM_NIXL_SIDE_CHANNEL_PORT=$nixl_port \
700+ DYN_VLLM_KV_EVENT_PORT=$kv_event_port \
680701 ${dynamo_args["prefill-cmd"]} \
681702 $( array_to_args prefill_args) ${prefill_args["--extra-args"]} > $log_file 2>&1 &
682703 done
0 commit comments