File tree Expand file tree Collapse file tree 1 file changed +7
-5
lines changed
examples/wide_ep/slurm_scripts Expand file tree Collapse file tree 1 file changed +7
-5
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ model_dir=<model_dir> # Path to the model checkpoint
1212repo_dir=< repo_dir> # Path to the repo to install TensorRT-LLM, if this is empty, the pre-installed version will be used
1313
1414mtp_size=0
15- ntasks_per_node=4 # 4 GPUs per GB200 node
15+ ntasks_per_node=4 # 4 GPUs per GB200 node, 8 GPUs per B200 node
1616
1717isl=1024
1818osl=1024
@@ -23,8 +23,9 @@ streaming=true
2323for b in 1 64 1024; do
2424 for eplb_num_slots in 0 256 288; do
2525 concurrency=$(( b * 16 ))
26- ctx_num=$(( (concurrency + 5499 )/ 5500 ))
27- total_node_num=$(( ctx_num + 4 ))
26+ ctx_node_num=$(( (concurrency + 5499 )/ 5500 )) # $(((concurrency + 10999)/11000)) for B200
27+ ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
28+ total_node_num=$(( ctx_node_num + 4 )) # $((ctx_node_num + 2)) for B200
2829 ntasks=$(( total_node_num * ntasks_per_node))
2930
3031 args=(
5859# dep32 eplb288
5960for b in 512; do
6061 concurrency=$(( b * 32 ))
61- ctx_num=$(( (concurrency + 5499 )/ 5500 ))
62- total_node_num=$(( ctx_num + 8 ))
62+ ctx_node_num=$(( (concurrency + 5499 )/ 5500 )) # $(((concurrency + 10999)/11000)) for B200
63+ ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
64+ total_node_num=$(( ctx_node_num + 8 )) # $((ctx_node_num + 4)) for B200
6365 ntasks=$(( total_node_num * ntasks_per_node))
6466 eplb_num_slots=288
6567
You can’t perform that action at this time.
0 commit comments