Merge branch 'feature/ci' into feat/mlflow-tracking

mouad-hpc · web-flow · commit 635817812003 · 2026-02-16T17:28:25.000-08:00
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
@@ -95,7 +95,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
+        info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp_true_on_policy.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
     defaults:
       run:
         working-directory: ${{ github.workspace }}
@@ -330,7 +330,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        info: [{"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_async_short.py"}, {"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_short.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 8, "test_file": "test_mimo_7B_mtp_only_grad.py"}, {"num_gpus": 8, "test_file": "test_qwen3_0.6B_parallel_check.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py --async-save"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}]
+        info: [{"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_async_short.py"}, {"num_gpus": 4, "test_file": "test_qwen2.5_0.5B_gsm8k_short.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp_true_on_policy.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 8, "test_file": "test_mimo_7B_mtp_only_grad.py"}, {"num_gpus": 8, "test_file": "test_qwen3_0.6B_parallel_check.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ckpt.py --async-save"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}]
     defaults:
       run:
         working-directory: ${{ github.workspace }}
diff --git a/.github/workflows/pr-test.yml.j2 b/.github/workflows/pr-test.yml.j2
@@ -10,7 +10,7 @@
       'label': 'run-ci-fsdp',
       'tests': [
         {'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py --colocated', 'num_gpus': 4},
-        {'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_vl_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
         {'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
         {'test_file': 'test_qwen3_0.6B_megatron_fsdp_align.py', 'num_gpus': 4},
       ],
@@ -59,7 +59,7 @@
         {'test_file': 'test_qwen2.5_0.5B_gsm8k_short.py', 'num_gpus': 4},
         {'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
         {'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
-        {'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_vl_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
         {'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
         {'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
         {'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},
diff --git a/tests/test_qwen3_vl_4B_fsdp_true_on_policy.py b/tests/test_qwen3_vl_4B_fsdp_true_on_policy.py
@@ -68,13 +68,13 @@ def execute():
         "--sglang-mem-fraction-static 0.6 "
         "--sglang-decode-log-interval 1000 "
         "--sglang-enable-metrics "
-        # "--sglang-enable-deterministic-inference "
-        # "--sglang-rl-on-policy-target fsdp "
+        "--sglang-enable-deterministic-inference "
+        "--sglang-rl-on-policy-target fsdp "
         "--sglang-attention-backend fa3 "
         "--attn-implementation flash_attention_3 "
         "--sglang-cuda-graph-bs 1 2 4 8 16 24 32 40 48 56 64 "
-        # "--deterministic-mode "
-        # "--true-on-policy-mode "
+        "--deterministic-mode "
+        "--true-on-policy-mode "
     )
 
     ci_args = "--ci-test "
@@ -96,9 +96,9 @@ def execute():
     )
 
     extra_env_vars = {
-        # "NCCL_ALGO": "allreduce:tree",
-        # "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
-        # "CUBLAS_WORKSPACE_CONFIG": ":4096:8",
+        "NCCL_ALGO": "allreduce:tree",
+        "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
+        "CUBLAS_WORKSPACE_CONFIG": ":4096:8",
         "CUDA_DEVICE_MAX_CONNECTIONS": "1",
     }