recipes/taxonomy.yaml at main · SemiAnalysisAI/recipes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# taxonomy.yaml — Controlled vocabulary for vLLM recipes
# All hardware_profiles, tasks, and strategies referenced in recipes must be defined here.

hardware_profiles:
  # ── NVIDIA Hopper ──
  h100:
    brand: NVIDIA
    generation: hopper
    display_name: "H100"
    description: "NVIDIA H100 80GB NVLink (8-GPU node)"
    gpu_count: 8
    vram_gb: 640
    multi_node: false

  h200:
    brand: NVIDIA
    generation: hopper
    display_name: "H200"
    description: "NVIDIA H200 SXM 141 GB HBM3e · 8-GPU HGX node"
    gpu_count: 8
    vram_gb: 1128
    multi_node: false

  # ── NVIDIA Blackwell ──
  # Specs follow semianalysis InferenceX GPU table (inferencex.semianalysis.com/gpu-specs).
  b200:
    brand: NVIDIA
    generation: blackwell
    display_name: "B200"
    description: "NVIDIA B200 SXM 180 GB HBM3e · 8-GPU HGX B200 node"
    gpu_count: 8
    vram_gb: 1440
    multi_node: false

  gb200:
    brand: NVIDIA
    generation: blackwell
    display_name: "GB200 NVL4"
    description: "NVIDIA GB200 Grace-Blackwell compute tray · 192 GB HBM3e/GPU · 4 Blackwell GPUs (NVL72 tray unit)"
    gpu_count: 4
    vram_gb: 768
    multi_node: false

  # ── NVIDIA Blackwell Ultra ──
  b300:
    brand: NVIDIA
    generation: blackwell
    display_name: "B300"
    description: "NVIDIA B300 SXM 268 GB HBM3e · 8-GPU HGX B300 node (Blackwell Ultra)"
    gpu_count: 8
    vram_gb: 2144
    multi_node: false

  gb300:
    brand: NVIDIA
    generation: blackwell
    display_name: "GB300 NVL4"
    description: "NVIDIA GB300 Grace-Blackwell Ultra compute tray · 288 GB HBM3e/GPU · 4 GPUs (NVL72 tray unit)"
    gpu_count: 4
    vram_gb: 1152
    multi_node: false

  # ── AMD Instinct ──
  mi300x:
    brand: AMD
    generation: amd
    display_name: "MI300X"
    description: "AMD Instinct MI300X 192 GB HBM3 · 8-GPU OAM node (CDNA 3)"
    gpu_count: 8
    vram_gb: 1536
    multi_node: false

  mi325x:
    brand: AMD
    generation: amd
    display_name: "MI325X"
    description: "AMD Instinct MI325X 256 GB HBM3e · 8-GPU OAM node (CDNA 3)"
    gpu_count: 8
    vram_gb: 2048
    multi_node: false

  mi355x:
    brand: AMD
    generation: amd
    display_name: "MI355X"
    description: "AMD Instinct MI355X 288 GB HBM3e · 8-GPU OAM node (CDNA 4)"
    gpu_count: 8
    vram_gb: 2304
    multi_node: false

  # ── Google Cloud TPU ──
  # `restricted: true` hides these from the hardware picker unless the recipe
  # explicitly lists them in `meta.hardware` — TPU support requires a separate
  # vLLM build and is only declared on the recipes that have been validated.
  trillium:
    brand: Google
    generation: tpu
    display_name: "TPU v6e (Trillium)"
    description: "Google Cloud TPU v6e Trillium · 32 GB HBM/chip · served via vLLM TPU"
    gpu_count: 8
    vram_gb: 256
    multi_node: false
    restricted: true

  ironwood:
    brand: Google
    generation: tpu
    display_name: "TPU v7 (Ironwood)"
    description: "Google Cloud TPU v7 Ironwood · 192 GB HBM/chip · served via vLLM TPU"
    gpu_count: 1
    vram_gb: 192
    multi_node: false
    restricted: true

tasks:
  - id: text
    display_name: "Text"
    description: "Text-only models: chat, reasoning, code generation, tool calling"
  - id: multimodal
    display_name: "Multimodal"
    description: "Vision, audio, or other modalities beyond text"
  - id: omni
    display_name: "Omni"
    description: "Any-to-any models: text + vision + audio + generation"
  - id: embedding
    display_name: "Embedding"
    description: "Embedding, reranker, classifier, scoring models"

strategies:
  - single_node_tp
  - single_node_tep
  - single_node_dep
  - multi_node_tp
  - multi_node_tep
  - multi_node_dep
  - multi_node_tp_pp
  - pd_cluster