[ci] CI fixes for NovaSky-AI#1769 (NovaSky-AI#1795)

erictang000 · web-flow · commit 11cb3a1ebc91 · 2026-06-16T13:21:22.000-07:00
- fix cpu ci (mock megatron imports in cpu ci
- bump anyscale version for h100 from 0.24.79 to 0.26.103 for h100 CI to
try to resolve bucket access issue with k8s
- add language_model_only flag for `test
- bump timeout for megatron models test from 1000 -&gt; 1500 to account for
new qwen3.5-0.8b seq packing test
diff --git a/.github/workflows/gpu_ci_h100.yaml b/.github/workflows/gpu_ci_h100.yaml
@@ -53,7 +53,7 @@ jobs:
         with:
           activate-environment: true
       - name: Install basic dependencies
-        run: uv pip install anyscale==0.24.79 typer==0.9.0
+        run: uv pip install anyscale==0.26.103 typer==0.9.0
       # Run h100 tests via anyscale staging (compute config llm-team-h100-4x:1)
       - name: GPU tests
         env:
diff --git a/.github/workflows/gpu_skyrl_train_megatron_models.yaml b/.github/workflows/gpu_skyrl_train_megatron_models.yaml
@@ -64,5 +64,5 @@ jobs:
         run: |
           COMMIT_SHA="${{ github.event.pull_request.head.sha || github.sha }}"
           JOB_NAME="skyrl-train-gpu-ci-megatron-models-${COMMIT_SHA:0:7}-${{ github.run_id }}"
-          anyscale job submit -f ci/anyscale_gpu_ci_skyrl_train_megatron_models.yaml --name "$JOB_NAME" --timeout 1000
-          anyscale job wait --cloud sky-anyscale-aws-us-east-1 --name "$JOB_NAME" --timeout 1000
+          anyscale job submit -f ci/anyscale_gpu_ci_skyrl_train_megatron_models.yaml --name "$JOB_NAME" --timeout 1500
+          anyscale job wait --cloud sky-anyscale-aws-us-east-1 --name "$JOB_NAME" --timeout 1500
diff --git a/tests/backends/skyrl_train/distributed/test_preprocess_packed_seqs_cp.py b/tests/backends/skyrl_train/distributed/test_preprocess_packed_seqs_cp.py
@@ -61,6 +61,7 @@ class _PackedSeqParams:
 _mock_modules["megatron.core.optimizer"].ChainedOptimizer = MagicMock
 _mock_modules["megatron.core.transformer.module"].Float16Module = MagicMock
 _mock_modules["megatron.core.utils"].get_attr_wrapped_model = MagicMock()
+_mock_modules["megatron.core.utils"].unwrap_model = MagicMock()
 _mock_modules["megatron.core.transformer.moe.moe_utils"].clear_aux_losses_tracker = MagicMock()
 _mock_modules["megatron.core.transformer.moe.moe_utils"].reduce_aux_losses_tracker_across_ranks = MagicMock()
 _mock_modules["megatron.core.transformer.moe.moe_utils"].get_moe_layer_wise_logging_tracker = MagicMock()
diff --git a/tests/backends/skyrl_train/distributed/test_preprocess_packed_seqs_multiseq.py b/tests/backends/skyrl_train/distributed/test_preprocess_packed_seqs_multiseq.py
@@ -53,6 +53,7 @@ class _PackedSeqParams:
 _mock_modules["megatron.core.transformer.moe.moe_utils"].get_moe_layer_wise_logging_tracker = MagicMock()
 _mock_modules["megatron.core.transformer.moe.moe_utils"].reduce_aux_losses_tracker_across_ranks = MagicMock()
 _mock_modules["megatron.core.utils"].get_attr_wrapped_model = MagicMock()
+_mock_modules["megatron.core.utils"].unwrap_model = MagicMock()
 
 
 @pytest.fixture(scope="module", autouse=True)
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_megatron_extractor_consistency.py b/tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_megatron_extractor_consistency.py
@@ -16,7 +16,7 @@
 ``name``).
 
 Run with::
-    uv run --isolated --extra megatron --extra dev pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_extractor_consistency.py
+    uv run --isolated --extra megatron --extra dev pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/megatron/test_megatron_extractor_consistency.py
 
 """
 
@@ -100,6 +100,9 @@ def _make_ref_cfg(model_name: str) -> SkyRLTrainConfig:
         cfg.trainer.ref.megatron_config.transformer_config_kwargs["mtp_num_layers"] = 0
     if is_moe:
         cfg.trainer.gradient_checkpointing_use_reentrant = True
+    if "qwen3.5" in model_name.lower():  # use LM only path for qwen3.5
+        cfg.trainer.ref.language_model_only = True
+        cfg.generator.inference_engine.language_model_only = True
     validate_cfg(cfg)
     return cfg
 
diff --git a/tests/train/test_packing_round_trip.py b/tests/train/test_packing_round_trip.py
@@ -71,6 +71,7 @@ class _PackedSeqParams:
 _mock_modules["megatron.core.transformer.moe.moe_utils"].get_moe_layer_wise_logging_tracker = MagicMock()
 _mock_modules["megatron.core.transformer.moe.moe_utils"].reduce_aux_losses_tracker_across_ranks = MagicMock()
 _mock_modules["megatron.core.utils"].get_attr_wrapped_model = MagicMock()
+_mock_modules["megatron.core.utils"].unwrap_model = MagicMock()
 
 
 @pytest.fixture(scope="module", autouse=True)