|
26 | 26 | MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct" |
27 | 27 |
|
28 | 28 | # Parallelization Settings |
29 | | -# Adjust based on your GPU configuration |
| 29 | +# Adjust based on your GPU configuration. |
| 30 | +# If DATA_PARALLEL_SIZE > 1, this script automatically switches to torchrun. |
30 | 31 | TENSOR_PARALLEL_SIZE=4 # Number of GPUs for tensor parallelism |
31 | | -DATA_PARALLEL_SIZE=1 # Number of GPUs for data parallelism |
| 32 | +DATA_PARALLEL_SIZE=1 # Number of model replicas for data parallelism |
32 | 33 |
|
33 | 34 | # Memory and Performance Settings |
34 | 35 | GPU_MEMORY_UTILIZATION=0.85 # Fraction of GPU memory to use (0.0 - 1.0) |
@@ -68,10 +69,17 @@ echo "Batch Size: $BATCH_SIZE" |
68 | 69 | echo "Output Path: $OUTPUT_PATH" |
69 | 70 | echo "==========================================" |
70 | 71 |
|
71 | | -# Build the command |
72 | | -CMD="uv run python -m lmms_eval \ |
| 72 | +LAUNCHER="uv run python -m lmms_eval" |
| 73 | +MODEL_ARGS="model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION}" |
| 74 | + |
| 75 | +if [ "${DATA_PARALLEL_SIZE}" -gt 1 ]; then |
| 76 | + LAUNCHER="uv run python -m torch.distributed.run --standalone --nproc_per_node=$((TENSOR_PARALLEL_SIZE * DATA_PARALLEL_SIZE)) -m lmms_eval" |
| 77 | + MODEL_ARGS="${MODEL_ARGS},data_parallel_size=${DATA_PARALLEL_SIZE}" |
| 78 | +fi |
| 79 | + |
| 80 | +CMD="${LAUNCHER} \ |
73 | 81 | --model vllm \ |
74 | | - --model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},data_parallel_size=${DATA_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \ |
| 82 | + --model_args ${MODEL_ARGS} \ |
75 | 83 | --tasks ${TASKS} \ |
76 | 84 | --batch_size ${BATCH_SIZE} \ |
77 | 85 | --output_path ${OUTPUT_PATH}" |
|
0 commit comments