Skip to content

Commit ffc62a2

Browse files
authored
fix(examples): use torchrun for vllm data parallel (#1234)
1 parent a120909 commit ffc62a2

2 files changed

Lines changed: 25 additions & 8 deletions

File tree

examples/models/vllm_qwen35.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ MODEL="Qwen/Qwen3.5-397B-A17B"
44
TASKS="mmmu_val,mme"
55

66
TENSOR_PARALLEL_SIZE=8
7+
DATA_PARALLEL_SIZE=1
78
GPU_MEMORY_UTILIZATION=0.85
89
BATCH_SIZE=16
910
MAX_MODEL_LEN=262144
@@ -15,9 +16,17 @@ REASONING_PARSER="qwen3"
1516
OUTPUT_PATH="./logs/qwen35_vllm"
1617
LOG_SUFFIX="qwen35_vllm"
1718

18-
CMD="uv run python -m lmms_eval \
19+
LAUNCHER="uv run python -m lmms_eval"
20+
MODEL_ARGS="model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION},max_model_len=${MAX_MODEL_LEN},reasoning_parser=${REASONING_PARSER}"
21+
22+
if [ "${DATA_PARALLEL_SIZE}" -gt 1 ]; then
23+
LAUNCHER="uv run python -m torch.distributed.run --standalone --nproc_per_node=$((TENSOR_PARALLEL_SIZE * DATA_PARALLEL_SIZE)) -m lmms_eval"
24+
MODEL_ARGS="${MODEL_ARGS},data_parallel_size=${DATA_PARALLEL_SIZE}"
25+
fi
26+
27+
CMD="${LAUNCHER} \
1928
--model vllm \
20-
--model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION},max_model_len=${MAX_MODEL_LEN},reasoning_parser=${REASONING_PARSER} \
29+
--model_args ${MODEL_ARGS} \
2130
--tasks ${TASKS} \
2231
--batch_size ${BATCH_SIZE} \
2332
--log_samples --log_samples_suffix ${LOG_SUFFIX} \
@@ -27,4 +36,4 @@ echo "Running command:"
2736
echo "$CMD"
2837
echo ""
2938

30-
eval $CMD
39+
eval "$CMD"

examples/models/vllm_qwen3vl.sh

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@
2626
MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"
2727

2828
# Parallelization Settings
29-
# Adjust based on your GPU configuration
29+
# Adjust based on your GPU configuration.
30+
# If DATA_PARALLEL_SIZE > 1, this script automatically switches to torchrun.
3031
TENSOR_PARALLEL_SIZE=4 # Number of GPUs for tensor parallelism
31-
DATA_PARALLEL_SIZE=1 # Number of GPUs for data parallelism
32+
DATA_PARALLEL_SIZE=1 # Number of model replicas for data parallelism
3233

3334
# Memory and Performance Settings
3435
GPU_MEMORY_UTILIZATION=0.85 # Fraction of GPU memory to use (0.0 - 1.0)
@@ -68,10 +69,17 @@ echo "Batch Size: $BATCH_SIZE"
6869
echo "Output Path: $OUTPUT_PATH"
6970
echo "=========================================="
7071

71-
# Build the command
72-
CMD="uv run python -m lmms_eval \
72+
LAUNCHER="uv run python -m lmms_eval"
73+
MODEL_ARGS="model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION}"
74+
75+
if [ "${DATA_PARALLEL_SIZE}" -gt 1 ]; then
76+
LAUNCHER="uv run python -m torch.distributed.run --standalone --nproc_per_node=$((TENSOR_PARALLEL_SIZE * DATA_PARALLEL_SIZE)) -m lmms_eval"
77+
MODEL_ARGS="${MODEL_ARGS},data_parallel_size=${DATA_PARALLEL_SIZE}"
78+
fi
79+
80+
CMD="${LAUNCHER} \
7381
--model vllm \
74-
--model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},data_parallel_size=${DATA_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
82+
--model_args ${MODEL_ARGS} \
7583
--tasks ${TASKS} \
7684
--batch_size ${BATCH_SIZE} \
7785
--output_path ${OUTPUT_PATH}"

0 commit comments

Comments
 (0)