Skip to content

Commit c2fe8b0

Browse files
authored
[https://nvbugs/5405041][fix] Update wide-ep doc (NVIDIA#6933)
Signed-off-by: Xianjie <5410381+qiaoxj07@users.noreply.github.com>
1 parent 1c1d5d2 commit c2fe8b0

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

examples/wide_ep/slurm_scripts/submit.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ model_dir=<model_dir> # Path to the model checkpoint
1212
repo_dir=<repo_dir> # Path to the repo to install TensorRT-LLM, if this is empty, the pre-installed version will be used
1313

1414
mtp_size=0
15-
ntasks_per_node=4 # 4 GPUs per GB200 node
15+
ntasks_per_node=4 # 4 GPUs per GB200 node, 8 GPUs per B200 node
1616

1717
isl=1024
1818
osl=1024
@@ -23,8 +23,9 @@ streaming=true
2323
for b in 1 64 1024; do
2424
for eplb_num_slots in 0 256 288; do
2525
concurrency=$((b * 16))
26-
ctx_num=$(((concurrency + 5499)/5500))
27-
total_node_num=$((ctx_num + 4))
26+
ctx_node_num=$(((concurrency + 5499)/5500)) # $(((concurrency + 10999)/11000)) for B200
27+
ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
28+
total_node_num=$((ctx_node_num + 4)) # $((ctx_node_num + 2)) for B200
2829
ntasks=$((total_node_num * ntasks_per_node))
2930

3031
args=(
@@ -58,8 +59,9 @@ done
5859
# dep32 eplb288
5960
for b in 512; do
6061
concurrency=$((b * 32))
61-
ctx_num=$(((concurrency + 5499)/5500))
62-
total_node_num=$((ctx_num + 8))
62+
ctx_node_num=$(((concurrency + 5499)/5500)) # $(((concurrency + 10999)/11000)) for B200
63+
ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
64+
total_node_num=$((ctx_node_num + 8)) # $((ctx_node_num + 4)) for B200
6365
ntasks=$((total_node_num * ntasks_per_node))
6466
eplb_num_slots=288
6567

0 commit comments

Comments
 (0)