forked from vllm-project/recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtaxonomy.yaml
More file actions
137 lines (124 loc) · 3.69 KB
/
taxonomy.yaml
File metadata and controls
137 lines (124 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# taxonomy.yaml — Controlled vocabulary for vLLM recipes
# All hardware_profiles, tasks, and strategies referenced in recipes must be defined here.
hardware_profiles:
# ── NVIDIA Hopper ──
h100:
brand: NVIDIA
generation: hopper
display_name: "H100"
description: "NVIDIA H100 80GB NVLink (8-GPU node)"
gpu_count: 8
vram_gb: 640
multi_node: false
h200:
brand: NVIDIA
generation: hopper
display_name: "H200"
description: "NVIDIA H200 SXM 141 GB HBM3e · 8-GPU HGX node"
gpu_count: 8
vram_gb: 1128
multi_node: false
# ── NVIDIA Blackwell ──
# Specs follow semianalysis InferenceX GPU table (inferencex.semianalysis.com/gpu-specs).
b200:
brand: NVIDIA
generation: blackwell
display_name: "B200"
description: "NVIDIA B200 SXM 180 GB HBM3e · 8-GPU HGX B200 node"
gpu_count: 8
vram_gb: 1440
multi_node: false
gb200:
brand: NVIDIA
generation: blackwell
display_name: "GB200 NVL4"
description: "NVIDIA GB200 Grace-Blackwell compute tray · 192 GB HBM3e/GPU · 4 Blackwell GPUs (NVL72 tray unit)"
gpu_count: 4
vram_gb: 768
multi_node: false
# ── NVIDIA Blackwell Ultra ──
b300:
brand: NVIDIA
generation: blackwell
display_name: "B300"
description: "NVIDIA B300 SXM 268 GB HBM3e · 8-GPU HGX B300 node (Blackwell Ultra)"
gpu_count: 8
vram_gb: 2144
multi_node: false
gb300:
brand: NVIDIA
generation: blackwell
display_name: "GB300 NVL4"
description: "NVIDIA GB300 Grace-Blackwell Ultra compute tray · 288 GB HBM3e/GPU · 4 GPUs (NVL72 tray unit)"
gpu_count: 4
vram_gb: 1152
multi_node: false
# ── AMD Instinct ──
mi300x:
brand: AMD
generation: amd
display_name: "MI300X"
description: "AMD Instinct MI300X 192 GB HBM3 · 8-GPU OAM node (CDNA 3)"
gpu_count: 8
vram_gb: 1536
multi_node: false
mi325x:
brand: AMD
generation: amd
display_name: "MI325X"
description: "AMD Instinct MI325X 256 GB HBM3e · 8-GPU OAM node (CDNA 3)"
gpu_count: 8
vram_gb: 2048
multi_node: false
mi355x:
brand: AMD
generation: amd
display_name: "MI355X"
description: "AMD Instinct MI355X 288 GB HBM3e · 8-GPU OAM node (CDNA 4)"
gpu_count: 8
vram_gb: 2304
multi_node: false
# ── Google Cloud TPU ──
# `restricted: true` hides these from the hardware picker unless the recipe
# explicitly lists them in `meta.hardware` — TPU support requires a separate
# vLLM build and is only declared on the recipes that have been validated.
trillium:
brand: Google
generation: tpu
display_name: "TPU v6e (Trillium)"
description: "Google Cloud TPU v6e Trillium · 32 GB HBM/chip · served via vLLM TPU"
gpu_count: 8
vram_gb: 256
multi_node: false
restricted: true
ironwood:
brand: Google
generation: tpu
display_name: "TPU v7 (Ironwood)"
description: "Google Cloud TPU v7 Ironwood · 192 GB HBM/chip · served via vLLM TPU"
gpu_count: 1
vram_gb: 192
multi_node: false
restricted: true
tasks:
- id: text
display_name: "Text"
description: "Text-only models: chat, reasoning, code generation, tool calling"
- id: multimodal
display_name: "Multimodal"
description: "Vision, audio, or other modalities beyond text"
- id: omni
display_name: "Omni"
description: "Any-to-any models: text + vision + audio + generation"
- id: embedding
display_name: "Embedding"
description: "Embedding, reranker, classifier, scoring models"
strategies:
- single_node_tp
- single_node_tep
- single_node_dep
- multi_node_tp
- multi_node_tep
- multi_node_dep
- multi_node_tp_pp
- pd_cluster