File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 5353 with :
5454 activate-environment : true
5555 - name : Install basic dependencies
56- run : uv pip install anyscale==0.24.79 typer==0.9.0
56+ run : uv pip install anyscale==0.26.103 typer==0.9.0
5757 # Run h100 tests via anyscale staging (compute config llm-team-h100-4x:1)
5858 - name : GPU tests
5959 env :
Original file line number Diff line number Diff line change 6464 run : |
6565 COMMIT_SHA="${{ github.event.pull_request.head.sha || github.sha }}"
6666 JOB_NAME="skyrl-train-gpu-ci-megatron-models-${COMMIT_SHA:0:7}-${{ github.run_id }}"
67- anyscale job submit -f ci/anyscale_gpu_ci_skyrl_train_megatron_models.yaml --name "$JOB_NAME" --timeout 1000
68- anyscale job wait --cloud sky-anyscale-aws-us-east-1 --name "$JOB_NAME" --timeout 1000
67+ anyscale job submit -f ci/anyscale_gpu_ci_skyrl_train_megatron_models.yaml --name "$JOB_NAME" --timeout 1500
68+ anyscale job wait --cloud sky-anyscale-aws-us-east-1 --name "$JOB_NAME" --timeout 1500
Original file line number Diff line number Diff line change @@ -61,6 +61,7 @@ class _PackedSeqParams:
6161_mock_modules ["megatron.core.optimizer" ].ChainedOptimizer = MagicMock
6262_mock_modules ["megatron.core.transformer.module" ].Float16Module = MagicMock
6363_mock_modules ["megatron.core.utils" ].get_attr_wrapped_model = MagicMock ()
64+ _mock_modules ["megatron.core.utils" ].unwrap_model = MagicMock ()
6465_mock_modules ["megatron.core.transformer.moe.moe_utils" ].clear_aux_losses_tracker = MagicMock ()
6566_mock_modules ["megatron.core.transformer.moe.moe_utils" ].reduce_aux_losses_tracker_across_ranks = MagicMock ()
6667_mock_modules ["megatron.core.transformer.moe.moe_utils" ].get_moe_layer_wise_logging_tracker = MagicMock ()
Original file line number Diff line number Diff line change @@ -53,6 +53,7 @@ class _PackedSeqParams:
5353_mock_modules ["megatron.core.transformer.moe.moe_utils" ].get_moe_layer_wise_logging_tracker = MagicMock ()
5454_mock_modules ["megatron.core.transformer.moe.moe_utils" ].reduce_aux_losses_tracker_across_ranks = MagicMock ()
5555_mock_modules ["megatron.core.utils" ].get_attr_wrapped_model = MagicMock ()
56+ _mock_modules ["megatron.core.utils" ].unwrap_model = MagicMock ()
5657
5758
5859@pytest .fixture (scope = "module" , autouse = True )
Original file line number Diff line number Diff line change 1616``name``).
1717
1818Run with::
19- uv run --isolated --extra megatron --extra dev pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_extractor_consistency.py
19+ uv run --isolated --extra megatron --extra dev pytest -s -vvv tests/backends/skyrl_train/gpu/gpu_ci/megatron/ test_megatron_extractor_consistency.py
2020
2121"""
2222
@@ -100,6 +100,9 @@ def _make_ref_cfg(model_name: str) -> SkyRLTrainConfig:
100100 cfg .trainer .ref .megatron_config .transformer_config_kwargs ["mtp_num_layers" ] = 0
101101 if is_moe :
102102 cfg .trainer .gradient_checkpointing_use_reentrant = True
103+ if "qwen3.5" in model_name .lower (): # use LM only path for qwen3.5
104+ cfg .trainer .ref .language_model_only = True
105+ cfg .generator .inference_engine .language_model_only = True
103106 validate_cfg (cfg )
104107 return cfg
105108
Original file line number Diff line number Diff line change @@ -71,6 +71,7 @@ class _PackedSeqParams:
7171_mock_modules ["megatron.core.transformer.moe.moe_utils" ].get_moe_layer_wise_logging_tracker = MagicMock ()
7272_mock_modules ["megatron.core.transformer.moe.moe_utils" ].reduce_aux_losses_tracker_across_ranks = MagicMock ()
7373_mock_modules ["megatron.core.utils" ].get_attr_wrapped_model = MagicMock ()
74+ _mock_modules ["megatron.core.utils" ].unwrap_model = MagicMock ()
7475
7576
7677@pytest .fixture (scope = "module" , autouse = True )
You can’t perform that action at this time.
0 commit comments