Skip to content

Commit d515c3f

Browse files
committed
make max cuda graph bs smallers ller to accelerate ci
1 parent dd2e4be commit d515c3f

16 files changed

+41
-17
lines changed

tests/test_mimo_7B_mtp_only_grad.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def execute():
9191
"--rollout-num-gpus-per-engine 2 "
9292
"--rollout-num-gpus 8 "
9393
"--sglang-mem-fraction-static 0.8 "
94+
"--sglang-cuda-graph-max-bs 8 "
9495
"--sglang-enable-metrics "
9596
"--sglang-speculative-algorithm EAGLE "
9697
"--sglang-speculative-num-steps 2 "

tests/test_moonlight_16B_A3B.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def execute():
3232
"--rm-type math "
3333
"--num-rollout 3 "
3434
"--rollout-batch-size 8 "
35-
"--n-samples-per-prompt 8 "
35+
"--n-samples-per-prompt 4 "
3636
"--rollout-max-response-len 4096 "
3737
"--rollout-temperature 1 "
3838
"--global-batch-size 32 "
@@ -80,7 +80,10 @@ def execute():
8080
)
8181

8282
sglang_args = (
83-
"--rollout-num-gpus-per-engine 2 " "--sglang-mem-fraction-static 0.8 " "--sglang-max-running-requests 512 "
83+
"--rollout-num-gpus-per-engine 2 "
84+
"--sglang-mem-fraction-static 0.8 "
85+
"--sglang-cuda-graph-max-bs 32 "
86+
"--sglang-max-running-requests 512 "
8487
)
8588

8689
ci_args = "--ci-test "

tests/test_moonlight_16B_A3B_r3.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def execute():
3232
"--rm-type math "
3333
"--num-rollout 3 "
3434
"--rollout-batch-size 8 "
35-
"--n-samples-per-prompt 8 "
35+
"--n-samples-per-prompt 4 "
3636
"--rollout-max-response-len 4096 "
3737
"--rollout-temperature 1 "
3838
"--global-batch-size 32 "
@@ -82,7 +82,10 @@ def execute():
8282
)
8383

8484
sglang_args = (
85-
"--rollout-num-gpus-per-engine 2 " "--sglang-mem-fraction-static 0.8 " "--sglang-max-running-requests 512 "
85+
"--rollout-num-gpus-per-engine 2 "
86+
"--sglang-mem-fraction-static 0.8 "
87+
"--sglang-cuda-graph-max-bs 32 "
88+
"--sglang-max-running-requests 512 "
8689
)
8790

8891
ci_args = "--ci-test "

tests/test_quick_start_glm4_9B.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def execute():
3030
"--rm-type deepscaler "
3131
"--num-rollout 3 "
3232
"--rollout-batch-size 8 "
33-
"--n-samples-per-prompt 8 "
33+
"--n-samples-per-prompt 4 "
3434
"--rollout-max-response-len 8192 "
3535
"--rollout-temperature 1 "
3636
"--global-batch-size 32 "
@@ -80,7 +80,7 @@ def execute():
8080
"--adam-beta2 0.98 "
8181
)
8282

83-
sglang_args = "--rollout-num-gpus-per-engine 2 " "--use-slime-router "
83+
sglang_args = "--rollout-num-gpus-per-engine 2 " "--sglang-cuda-graph-max-bs 32 " "--use-slime-router "
8484

8585
ci_args = "--ci-test "
8686

tests/test_qwen2.5_0.5B_debug_rollout_then_train.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,9 @@ def execute_rollout_only(debug_data_dir: str):
8888
"""Phase 1: rollout-only, save data."""
8989

9090
sglang_args = (
91-
"--rollout-num-gpus-per-engine 1 " f"--sglang-mem-fraction-static {0.6 if TIGHT_DEVICE_MEMORY else 0.7} "
91+
"--rollout-num-gpus-per-engine 1 "
92+
f"--sglang-mem-fraction-static {0.6 if TIGHT_DEVICE_MEMORY else 0.7} "
93+
"--sglang-cuda-graph-max-bs 32 "
9294
)
9395

9496
phase1_args = (

tests/test_qwen2.5_0.5B_gsm8k_async_short.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def execute():
7575
sglang_args = (
7676
"--rollout-num-gpus-per-engine 1 "
7777
f"--sglang-mem-fraction-static {0.55 if TIGHT_DEVICE_MEMORY else 0.65} "
78+
"--sglang-cuda-graph-max-bs 32 "
7879
"--sglang-enable-metrics "
7980
)
8081

tests/test_qwen2.5_0.5B_gsm8k_short.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def execute():
7575
sglang_args = (
7676
"--rollout-num-gpus-per-engine 1 "
7777
f"--sglang-mem-fraction-static {0.6 if TIGHT_DEVICE_MEMORY else 0.7} "
78+
"--sglang-cuda-graph-max-bs 32 "
7879
"--sglang-enable-metrics "
7980
)
8081

tests/test_qwen2.5_0.5B_opd_sglang.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def launch_teacher():
157157
sglang_args = (
158158
"--rollout-num-gpus-per-engine 1 "
159159
f"--sglang-mem-fraction-static {0.6 if TIGHT_DEVICE_MEMORY else 0.7} "
160+
"--sglang-cuda-graph-max-bs 32 "
160161
"--sglang-enable-metrics "
161162
)
162163

tests/test_qwen3_0.6B_megatron_fsdp_align.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def execute():
3535
"--rm-type deepscaler "
3636
"--num-rollout 1 "
3737
"--rollout-batch-size 8 "
38-
"--n-samples-per-prompt 8 "
38+
"--n-samples-per-prompt 4 "
3939
"--rollout-max-response-len 8192 "
4040
"--rollout-temperature 1 "
4141
"--global-batch-size 64 "
@@ -62,7 +62,10 @@ def execute():
6262
)
6363

6464
sglang_args = (
65-
"--rollout-num-gpus-per-engine 1 " "--sglang-chunked-prefill-size 4096 " "--sglang-mem-fraction-static 0.75 "
65+
"--rollout-num-gpus-per-engine 1 "
66+
"--sglang-chunked-prefill-size 4096 "
67+
"--sglang-mem-fraction-static 0.75 "
68+
"--sglang-cuda-graph-max-bs 32 "
6669
)
6770

6871
ci_args = "--ci-test "

tests/test_qwen3_0.6B_parallel_check.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ def execute():
5757
"--adam-beta2 0.98 "
5858
)
5959

60-
sglang_args = "--rollout-num-gpus-per-engine 2 " "--rollout-num-gpus 8 " "--sglang-mem-fraction-static 0.8 "
60+
sglang_args = (
61+
"--rollout-num-gpus-per-engine 2 "
62+
"--rollout-num-gpus 8 "
63+
"--sglang-mem-fraction-static 0.8 "
64+
"--sglang-cuda-graph-max-bs 32 "
65+
)
6166

6267
ci_args = "--ci-test "
6368

0 commit comments

Comments
 (0)