@@ -33,8 +33,6 @@ dynamo_args["ingress-cmd"]="python -m dynamo.frontend --router-mode kv"
3333dynamo_args[" port" ]=8080
3434dynamo_args[" endpoint" ]=" v1/chat/completions"
3535dynamo_args[" model" ]=" deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
36- dynamo_args[" etcd-cmd" ]=" etcd --log-level debug"
37- dynamo_args[" nats-cmd" ]=" nats-server -js"
3836dynamo_args[" etcd-port" ]=2379
3937dynamo_args[" nats-port" ]=4222
4038dynamo_args[" workspace-path" ]=" /workspace"
@@ -47,8 +45,12 @@ dynamo_args["tp-arg-name"]="tensor-parallel-size"
4745dynamo_args[" pp-arg-name" ]=" pipeline-parallel-size"
4846dynamo_args[" multiple-prefill-workers-per-node" ]=" true"
4947dynamo_args[" multiple-decode-workers-per-node" ]=" true"
50- dynamo_args[" prefill-initialized-regex" ]=" prefill.*initialized"
51- dynamo_args[" decode-initialized-regex" ]=" decode.*initialized"
48+ dynamo_args[" prefill-initialized-regex" ]=" Worker.*has.been.initialized"
49+ dynamo_args[" decode-initialized-regex" ]=" Worker.*has.been.initialized"
50+
51+ dynamo_args[" etcd-cmd" ]=" etcd --log-level debug"
52+ dynamo_args[" nats-cmd" ]=" nats-server -js"
53+ dynamo_args[" genai-perf-cmd" ]=" genai-perf profile"
5254
5355# sglang-specific optional ports. Ignored by vllm.
5456dynamo_args[" sgl-http-port" ]=9001
@@ -310,15 +312,21 @@ _compute_worker_allocation_vllm() {
310312 dynamo_args[" decode-gpus-per-worker" ]=$num_gpus
311313 fi
312314
315+ log " DECODE: num GPUs: $num_gpus , GPUs per worker: ${dynamo_args["decode-gpus-per-worker"]} "
316+ log " PREFILL: num GPUs: $num_gpus , GPUs per worker: ${dynamo_args["prefill-gpus-per-worker"]} "
313317 dynamo_args[" prefill-workers-per-node" ]=$(( num_gpus / dynamo_args["prefill- gpus- per- worker"] ))
314318 dynamo_args[" decode-workers-per-node" ]=$(( num_gpus / dynamo_args["decode- gpus- per- worker"] ))
319+ log " DECODE: workers per node: ${dynamo_args["decode-workers-per-node"]} "
320+ log " PREFILL: workers per node: ${dynamo_args["prefill-workers-per-node"]} "
315321
316322 if [[ -n " ${prefill_args["--num-nodes"]} " ]]; then
317323 dynamo_args[" num-prefill-nodes" ]=${prefill_args["--num-nodes"]}
318324 fi
319325 if [[ -n " ${decode_args["--num-nodes"]} " ]]; then
320326 dynamo_args[" num-decode-nodes" ]=${decode_args["--num-nodes"]}
321327 fi
328+ log " NUM PREFILL NODES: ${dynamo_args["num-prefill-nodes"]} "
329+ log " NUM DECODE NODES: ${dynamo_args["num-decode-nodes"]} "
322330}
323331
324332_compute_worker_allocation () {
@@ -597,7 +605,7 @@ validate_environment() {
597605
598606function launch_etcd()
599607{
600- log " Launching etcd"
608+ log " Launching etcd with cmd: ${dynamo_args["etcd-cmd"]} --listen-client-urls http://0.0.0.0: ${dynamo_args["etcd-port"]} --advertise-client-urls http://0.0.0.0: ${dynamo_args["etcd-port"]} "
601609 ${dynamo_args["etcd-cmd"]} \
602610 --listen-client-urls http://0.0.0.0:${dynamo_args["etcd-port"]} \
603611 --advertise-client-urls http://0.0.0.0:${dynamo_args["etcd-port"]} \
@@ -606,7 +614,7 @@ function launch_etcd()
606614
607615function launch_nats()
608616{
609- log " Launching nats"
617+ log " Launching nats with cmd: ${dynamo_args["nats-cmd"]} -p ${dynamo_args["nats-port"]} "
610618 ${dynamo_args["nats-cmd"]} -p ${dynamo_args["nats-port"]} > ${RESULTS_DIR} /nats.log 2>&1
611619}
612620
@@ -633,12 +641,14 @@ function launch_decode()
633641 wait_for_etcd
634642
635643 local workers_per_node=${dynamo_args["decode-workers-per-node"]}
644+ log " Using workers per node: $workers_per_node "
636645
637646 for i in $( seq 0 $(( $workers_per_node - 1 )) ) ; do
638647 local gpu_list=$( _gpu_list_for_worker " ${dynamo_args["decode-gpus-per-worker"]} " " $i " )
639648 local log_file=$( _log_file_for_worker " decode" " $i " )
640649
641650 log " Launching decode worker $i on GPUs $gpu_list "
651+ log " Decode cmd: ${dynamo_args["decode-cmd"]} $( array_to_args decode_args) ${decode_args["--extra-args"]} "
642652 CUDA_VISIBLE_DEVICES=$gpu_list \
643653 ${dynamo_args["decode-cmd"]} \
644654 $( array_to_args decode_args) ${decode_args["--extra-args"]} > $log_file 2>&1 &
@@ -665,6 +675,7 @@ function launch_prefill()
665675 local log_file=$( _log_file_for_worker " prefill" " $i " )
666676
667677 log " Launching prefill worker $i on GPUs $gpu_list "
678+ log " Prefill cmd: ${dynamo_args["prefill-cmd"]} $( array_to_args prefill_args) ${prefill_args["--extra-args"]} "
668679 CUDA_VISIBLE_DEVICES=$gpu_list \
669680 ${dynamo_args["prefill-cmd"]} \
670681 $( array_to_args prefill_args) ${prefill_args["--extra-args"]} > $log_file 2>&1 &
@@ -680,11 +691,12 @@ function wait_for_dynamo_frontend()
680691 local have_prefill=$( _count_initialized_prefill)
681692 local have_decode=$( _count_initialized_decode)
682693
694+ log " Initialized: prefill ${have_prefill} /${want_prefill} ; decode ${have_decode} /${want_decode} "
695+
683696 if [[ $have_prefill -ge $want_prefill && $have_decode -ge $want_decode ]]; then
684697 break
685698 fi
686699
687- log " Initialized: prefill ${have_prefill} /${want_prefill} ; decode ${have_decode} /${want_decode} "
688700 exit_on_error
689701 sleep 30
690702 done
@@ -710,7 +722,7 @@ function launch_genai_perf()
710722 echo " Response: $resp "
711723
712724 local genai_perf_arguments=$( array_to_args genai_perf_args)
713- log " Launching genai-perf with args: $genai_perf_arguments ${genai_perf_args["--extra-args"]} "
725+ log " Launching genai-perf with cmd: ${dynamo_args["genai-perf-cmd"]} $genai_perf_arguments ${genai_perf_args["--extra-args"]} "
714726
715727 ${dynamo_args["genai-perf-cmd"]} ${genai_perf_arguments} ${genai_perf_args["--extra-args"]} > ${RESULTS_DIR} /genai_perf.log 2>&1
716728
0 commit comments