Skip to content

Commit 60626d4

Browse files
committed
Add 1k1k and adjust other configs concurrency
1 parent 4a13ebb commit 60626d4

3 files changed

Lines changed: 8 additions & 109 deletions

File tree

recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-2p1d-dep4-dep16.yaml renamed to recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "kimi-vllm-disagg-gb200-2p1d-dep4-dep16"
1+
name: "kimi-vllm-disagg-gb200-1p1d-dep4-dep16"
22

33
model:
44
path: "kimi-k2.5-nvfp4"
@@ -14,9 +14,9 @@ setup_script: vllm-container-deps.sh
1414
resources:
1515
gpu_type: "gb200"
1616
gpus_per_node: 4
17-
prefill_nodes: 2
17+
prefill_nodes: 1
1818
decode_nodes: 4
19-
prefill_workers: 2
19+
prefill_workers: 1
2020
decode_workers: 1
2121
gpus_per_prefill: 4
2222
gpus_per_decode: 16
@@ -54,7 +54,7 @@ backend:
5454
data-parallel-rpc-port: 13345
5555
enable-expert-parallel: true
5656
max-model-len: 3072
57-
max-num-seqs: 1024
57+
max-num-seqs: 4096
5858
enforce-eager: true
5959
compilation-config: '{"custom_ops":["+quant_fp8","+rms_norm","+rotary_embedding"],"pass_config":{"fuse_attn_quant":true,"fuse_allreduce_rms":true}}'
6060
max-num-batched-tokens: 16384
@@ -78,7 +78,7 @@ backend:
7878
data-parallel-rpc-port: 13345
7979
enable-expert-parallel: true
8080
max-model-len: 3072
81-
max-num-seqs: 256
81+
max-num-seqs: 4096
8282
max-num-batched-tokens: 10240
8383
safetensors-load-strategy: "prefetch"
8484
trust-remote-code: true
@@ -91,11 +91,11 @@ backend:
9191
compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","custom_ops":["+quant_fp8","+rms_norm","+rotary_embedding"],"pass_config":{"fuse_attn_quant":true,"fuse_allreduce_rms":true}}'
9292
gpu-memory-utilization: 0.9
9393
stream-interval: 50
94-
max-cudagraph-capture-size: 256
94+
max-cudagraph-capture-size: 512
9595

9696
benchmark:
9797
type: "sa-bench"
9898
isl: 1024
9999
osl: 1024
100-
concurrencies: "512x1024x2048"
100+
concurrencies: "256x512x1024x2048x3072x4096"
101101
req_rate: "inf"

recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,5 @@ benchmark:
9494
type: "sa-bench"
9595
isl: 1024
9696
osl: 1024
97-
concurrencies: "4x8x16x32x64x128x256"
97+
concurrencies: "4x8x16x32x64x128"
9898
req_rate: "inf"

recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-3p1d-dep4-dep16.yaml

Lines changed: 0 additions & 101 deletions
This file was deleted.

0 commit comments

Comments
 (0)