Skip to content

Commit 65e1970

Browse files
committed
fix
1 parent 2266d86 commit 65e1970

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

swift/megatron/argument/megatron_args.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -720,12 +720,12 @@ def __post_init__(self):
720720
os.environ.setdefault('CUDA_DEVICE_MAX_CONNECTIONS', '1')
721721
if self.recompute_granularity == 'none':
722722
self.recompute_granularity = None
723-
if self.apply_wd_to_qk_layernorm and self.hf_model_type != 'qwen3_next':
724-
raise ValueError('apply_wd_to_qk_layernorm is only supported for qwen3_next')
725723
self._set_default()
726724
self.model_info, self.model_meta = get_model_info_meta(
727725
self.model, model_type=self.model_type, use_hf=self.use_hf, hub_token=self.hub_token)
728726
self.model_type = self.model_info.model_type
727+
if self.apply_wd_to_qk_layernorm and self.model_type != 'qwen3_next':
728+
raise ValueError('apply_wd_to_qk_layernorm is only supported for qwen3_next')
729729
if self.pipeline_model_parallel_size == 1 and (self.decoder_first_pipeline_num_layers is not None
730730
or self.decoder_last_pipeline_num_layers is not None):
731731
raise ValueError('pipeline_model_parallel_size must be greater than 1 if you want to set '

0 commit comments

Comments
 (0)