-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathsglang_glm.toml
More file actions
36 lines (32 loc) · 1.05 KB
/
sglang_glm.toml
File metadata and controls
36 lines (32 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
image = "/capstor/store/cscs/swissai/infra01/container-images/sglang_glm5_cuda13.sqsh"
mounts = [
"/capstor/store/cscs/swissai/infra01/ocf-share:/ocfbin",
"/capstor",
"/iopsstor",
"/usr/lib64/libhwloc.so.15:/usr/lib/libhwloc.so.15",
"/usr/lib64/libpciaccess.so.0:/usr/lib/libpciaccess.so.0",
"/usr/lib64/libxml2.so.2:/usr/lib/libxml2.so.2",
"/usr/lib64/libnuma.so.1:/usr/lib/libnuma.so.1",
]
workdir = "/opt"
[env]
NCCL_DEBUG = "INFO" # uncomment for debugging
NCCL_DEBUG_SUBSYS = "INIT,NET" # uncomment for debugging
NCCL_NET = "AWS Libfabric"
NCCL_CROSS_NIC = "1"
NCCL_NET_GDR_LEVEL = "PHB"
NCCL_SOCKET_IFNAME = "hsn"
NCCL_PROTO = "^LL128"
FI_CXI_COMPAT = "0"
FI_MR_CACHE_MONITOR = "userfaultfd"
FI_CXI_RX_MATCH_MODE = "software"
FI_CXI_DEFAULT_CQ_SIZE = "131072"
FI_CXI_DEFAULT_TX_SIZE = "32768"
FI_CXI_DISABLE_HOST_REGISTER = "1"
OFI_NCCL_DISABLE_DMABUF = "1"
SGL_ENABLE_JIT_DEEPGEMM = "0"
VLLM_ALLREDUCE_USE_SYMM_MEM = "0"
[annotations]
com.hooks.aws_ofi_nccl.enabled = "true"
com.hooks.aws_ofi_nccl.variant = "cuda13"
com.hooks.cxi.enabled = "true"