|
| 1 | +#!/bin/bash |
| 2 | +# Launch Qwen/Qwen3-ASR-1.7B (1.7B multilingual ASR, 52 langs incl. |
| 3 | +# 22 Chinese dialects, built on Qwen3-Omni foundation) on one Clariden |
| 4 | +# GH200 node with vLLM, DP=4 TP=1 (4 independent replicas, one per GPU). |
| 5 | +# Suitable for high-throughput batch / streaming ASR over many audio clips. |
| 6 | +# |
| 7 | +# Qwen3-ASR uses the Qwen3ASRForConditionalGeneration architecture, which |
| 8 | +# is registered in stock vLLM 0.19+ (no vllm-omni needed). The generic |
| 9 | +# `vllm.toml` env points at the ci/vllm_cuda13 image (vLLM 0.19.1rc1, |
| 10 | +# transformers 5.5.4, torchaudio 2.11) which has the full audio arch set |
| 11 | +# and the newer Qwen3ASRConfig schema (with thinker_config). The image |
| 12 | +# is missing librosa/audioread (vLLM's audio file loader), so we install |
| 13 | +# them at launch via --pre-launch-cmds. |
| 14 | +# |
| 15 | +# Model weights (downloaded separately): |
| 16 | +# /capstor/store/cscs/swissai/infra01/MLLM/audio_asr/Qwen3-ASR-1.7B/ |
| 17 | +# |
| 18 | +sml advanced \ |
| 19 | + --firecrest-system clariden \ |
| 20 | + --partition normal \ |
| 21 | + --slurm-nodes 1 \ |
| 22 | + --slurm-time 6:00:00 \ |
| 23 | + --serving-framework vllm \ |
| 24 | + --worker-port 8080 \ |
| 25 | + --slurm-environment src/swiss_ai_model_launch/assets/envs/vllm.toml \ |
| 26 | + --pre-launch-cmds "pip install librosa audioread" \ |
| 27 | + --framework-args "--model /capstor/store/cscs/swissai/infra01/MLLM/audio_asr/Qwen3-ASR-1.7B \ |
| 28 | + --served-model-name Qwen/Qwen3-ASR-1.7B-$(whoami) \ |
| 29 | + --data-parallel-size 4 \ |
| 30 | + --tensor-parallel-size 1 \ |
| 31 | + --host 0.0.0.0 \ |
| 32 | + --port 8080 \ |
| 33 | + --dtype bfloat16 \ |
| 34 | + --max-model-len 32768 \ |
| 35 | + --trust-remote-code" |
0 commit comments