Skip to content

Commit a6108e7

Browse files
authored
vllm-omni exmaple (#39)
1 parent e5029ca commit a6108e7

2 files changed

Lines changed: 47 additions & 0 deletions

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
sml advanced \
3+
--slurm-nodes 1 \
4+
--slurm-time 6:00:00 \
5+
--serving-framework vllm \
6+
--slurm-environment src/swiss_ai_model_launch/assets/envs/vllm_qwen3_omni.toml \
7+
--framework-args "--model /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Qwen/Qwen3-Omni-30B-A3B-Captioner \
8+
--served-model-name Qwen/Qwen3-Omni-30B-A3B-Captioner-$(whoami) \
9+
--tensor-parallel-size 4 \
10+
--host 0.0.0.0 \
11+
--port 8080 \
12+
--dtype bfloat16 --max-model-len 32768 --trust-remote-code"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
image = "/capstor/store/cscs/swissai/infra01/container-images/vllm-qwen-omni-13.0.sqsh"
2+
3+
mounts = [
4+
"/capstor/store/cscs/swissai/infra01/ocf-share:/ocfbin",
5+
"/capstor",
6+
"/iopsstor",
7+
"/usr/lib64/libhwloc.so.15:/usr/lib/libhwloc.so.15",
8+
"/usr/lib64/libpciaccess.so.0:/usr/lib/libpciaccess.so.0",
9+
"/usr/lib64/libxml2.so.2:/usr/lib/libxml2.so.2",
10+
"/usr/lib64/libnuma.so.1:/usr/lib/libnuma.so.1",
11+
]
12+
13+
workdir = "/opt"
14+
15+
[env]
16+
# NCCL_DEBUG = "INFO" # uncomment for debugging
17+
# NCCL_DEBUG_SUBSYS = "INIT,NET" # uncomment for debugging
18+
NCCL_NET = "AWS Libfabric"
19+
NCCL_CROSS_NIC = "1"
20+
NCCL_NET_GDR_LEVEL = "PHB"
21+
NCCL_SOCKET_IFNAME = "hsn"
22+
NCCL_PROTO = "^LL128"
23+
FI_CXI_COMPAT = "0"
24+
FI_MR_CACHE_MONITOR = "userfaultfd"
25+
FI_CXI_RX_MATCH_MODE = "software"
26+
FI_CXI_DEFAULT_CQ_SIZE = "131072"
27+
FI_CXI_DEFAULT_TX_SIZE = "32768"
28+
FI_CXI_DISABLE_HOST_REGISTER = "1"
29+
OFI_NCCL_DISABLE_DMABUF = "1"
30+
VLLM_ALLREDUCE_USE_SYMM_MEM = "0"
31+
32+
[annotations]
33+
com.hooks.aws_ofi_nccl.enabled = "true"
34+
com.hooks.aws_ofi_nccl.variant = "cuda13"
35+
com.hooks.cxi.enabled = "true"

0 commit comments

Comments
 (0)