Skip to content

Commit 6358178

Browse files
authored
Merge branch 'feature/ci' into feat/mlflow-tracking
2 parents e5e7059 + a982f76 commit 6358178

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

.github/workflows/pr-test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
strategy:
9696
fail-fast: false
9797
matrix:
98-
info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
98+
info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp_true_on_policy.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
9999
defaults:
100100
run:
101101
working-directory: ${{ github.workspace }}
@@ -330,7 +330,7 @@ jobs:
330330
strategy:
331331
fail-fast: false
332332
matrix:
333-
info: [{"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_async_short.py"}, {"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_short.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 8, "test_file": "test_mimo_7B_mtp_only_grad.py"}, {"num_gpus": 8, "test_file": "test_qwen3_0.6B_parallel_check.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py --async-save"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}]
333+
info: [{"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_async_short.py"}, {"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_short.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp_true_on_policy.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 8, "test_file": "test_mimo_7B_mtp_only_grad.py"}, {"num_gpus": 8, "test_file": "test_qwen3_0.6B_parallel_check.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py --async-save"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}]
334334
defaults:
335335
run:
336336
working-directory: ${{ github.workspace }}

.github/workflows/pr-test.yml.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
'label': 'run-ci-fsdp',
1111
'tests': [
1212
{'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py --colocated', 'num_gpus': 4},
13-
{'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
13+
{'test_file': 'test_qwen3_vl_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
1414
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
1515
{'test_file': 'test_qwen3_0.6B_megatron_fsdp_align.py', 'num_gpus': 4},
1616
],
@@ -59,7 +59,7 @@
5959
{'test_file': 'test_qwen2.5_0.5B_gsm8k_short.py', 'num_gpus': 4},
6060
{'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
6161
{'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
62-
{'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
62+
{'test_file': 'test_qwen3_vl_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
6363
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
6464
{'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
6565
{'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},

tests/test_qwen3_vl_4B_fsdp.py renamed to tests/test_qwen3_vl_4B_fsdp_true_on_policy.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@ def execute():
6868
"--sglang-mem-fraction-static 0.6 "
6969
"--sglang-decode-log-interval 1000 "
7070
"--sglang-enable-metrics "
71-
# "--sglang-enable-deterministic-inference "
72-
# "--sglang-rl-on-policy-target fsdp "
71+
"--sglang-enable-deterministic-inference "
72+
"--sglang-rl-on-policy-target fsdp "
7373
"--sglang-attention-backend fa3 "
7474
"--attn-implementation flash_attention_3 "
7575
"--sglang-cuda-graph-bs 1 2 4 8 16 24 32 40 48 56 64 "
76-
# "--deterministic-mode "
77-
# "--true-on-policy-mode "
76+
"--deterministic-mode "
77+
"--true-on-policy-mode "
7878
)
7979

8080
ci_args = "--ci-test "
@@ -96,9 +96,9 @@ def execute():
9696
)
9797

9898
extra_env_vars = {
99-
# "NCCL_ALGO": "allreduce:tree",
100-
# "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
101-
# "CUBLAS_WORKSPACE_CONFIG": ":4096:8",
99+
"NCCL_ALGO": "allreduce:tree",
100+
"NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
101+
"CUBLAS_WORKSPACE_CONFIG": ":4096:8",
102102
"CUDA_DEVICE_MAX_CONNECTIONS": "1",
103103
}
104104

0 commit comments

Comments
 (0)