Skip to content

Commit 221df76

Browse files
committed
MiniMax-M2.5 B200: add EP, FP8 KV cache, disable radix cache
Based on validated benchmark configs in SemiAnalysisAI/InferenceX#1010, tp:4/ep:4 and tp:2/ep:2 are now confirmed for B200. Also enables 2-GPU selection for B200, adds --kv-cache-dtype fp8_e4m3 and --disable-radix-cache as B200-specific flags per the benchmark script. Signed-off-by: Faradawn Yang <73060648+faradawn@users.noreply.github.com>
1 parent 5a8713a commit 221df76

File tree

1 file changed

+18
-11
lines changed
  • src/components/autoregressive/MiniMaxM25ConfigGenerator

1 file changed

+18
-11
lines changed

src/components/autoregressive/MiniMaxM25ConfigGenerator/index.js

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ const MiniMaxM25ConfigGenerator = () => {
2828
title: 'GPU Count',
2929
getDynamicItems: (values) => {
3030
const isAMD = values.hardware === 'mi300x' || values.hardware === 'mi325x' || values.hardware === 'mi355x';
31+
const isB200 = values.hardware === 'b200';
3132

32-
// Show 2 GPU option for all hardware, but only enabled for AMD GPUs
33+
// Show 2 GPU option for all hardware, but only enabled for AMD GPUs and B200
3334
return [
3435
{
3536
id: '2gpu',
3637
label: '2',
3738
default: isAMD, // Default for all AMD GPUs
38-
disabled: !isAMD // Only enabled for AMD GPUs
39+
disabled: !isAMD && !isB200 // Only enabled for AMD GPUs and B200
3940
},
4041
{
4142
id: '4gpu',
@@ -75,10 +76,12 @@ const MiniMaxM25ConfigGenerator = () => {
7576
generateCommand: function (values) {
7677
const { hardware, gpuCount, thinking, toolcall } = values;
7778

78-
// Validate 2-GPU configuration (only AMD supports 2 GPUs)
7979
const isAMD = hardware === 'mi300x' || hardware === 'mi325x' || hardware === 'mi355x';
80-
if (gpuCount === '2gpu' && !isAMD) {
81-
return '# Please select compatible hardware\n# 2-GPU requires AMD MI300X/MI325X/MI355X';
80+
const isB200 = hardware === 'b200';
81+
82+
// Validate 2-GPU configuration (only AMD and B200 support 2 GPUs)
83+
if (gpuCount === '2gpu' && !isAMD && !isB200) {
84+
return '# Please select compatible hardware\n# 2-GPU requires AMD MI300X/MI325X/MI355X or B200';
8285
}
8386

8487
const modelName = `${this.modelFamily}/MiniMax-M2.5`;
@@ -88,21 +91,19 @@ const MiniMaxM25ConfigGenerator = () => {
8891
cmd += ` --model-path ${modelName}`;
8992

9093
// TP and EP size based on GPU count
91-
// NVIDIA: EP only for 8-GPU configuration
92-
// AMD: EP=TP for all configurations
94+
// NVIDIA (non-B200): EP only for 8-GPU configuration
95+
// B200 and AMD: EP=TP for all configurations
9396
if (gpuCount === '8gpu') {
9497
cmd += ` \\\n --tp 8`;
9598
cmd += ` \\\n --ep 8`;
9699
} else if (gpuCount === '4gpu') {
97100
cmd += ` \\\n --tp 4`;
98-
// Only add EP for AMD GPUs
99-
if (isAMD) {
101+
if (isAMD || isB200) {
100102
cmd += ` \\\n --ep 4`;
101103
}
102104
} else if (gpuCount === '2gpu') {
103105
cmd += ` \\\n --tp 2`;
104-
// Only add EP for AMD GPUs (MI355X only supports 2 GPU)
105-
if (isAMD) {
106+
if (isAMD || isB200) {
106107
cmd += ` \\\n --ep 2`;
107108
}
108109
}
@@ -126,6 +127,12 @@ const MiniMaxM25ConfigGenerator = () => {
126127
cmd += ` \\\n --attention-backend triton`;
127128
}
128129

130+
// Add B200-specific configurations (FP8 KV cache, disable radix cache)
131+
if (isB200) {
132+
cmd += ` \\\n --kv-cache-dtype fp8_e4m3`;
133+
cmd += ` \\\n --disable-radix-cache`;
134+
}
135+
129136
return cmd;
130137
}
131138
};

0 commit comments

Comments
 (0)