Skip to content

Commit 6af77cc

Browse files
authored
fix: parse moe_layer_freq from string to list in Megatron config adapter (#245)
1 parent 27a9ee5 commit 6af77cc

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

primus/modules/trainer/megatron/trainer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,11 @@ def update_primus_config(
895895
args.iterations_to_skip = []
896896

897897
# support moe_freq_type
898-
# args.moe_layer_freq = moe_freq_type(args.moe_layer_freq)
898+
if isinstance(args.moe_layer_freq, str):
899+
try:
900+
args.moe_layer_freq = eval(args.moe_layer_freq)
901+
except Exception:
902+
raise ValueError(f"Invalid moe_layer_freq format: {args.moe_layer_freq}")
899903

900904
if args.mock_data:
901905
args.data_path = None

tests/trainer/test_megatron_trainer.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,12 @@ def test_deepseek_v2_lite(self):
145145
exp_path="examples/megatron/configs/MI300X/deepseek_v2_lite-pretrain.yaml",
146146
env_override={},
147147
extra_args=[
148-
"--num_layers",
149-
"4",
150148
"--train_iters",
151149
"3",
152150
"--micro_batch_size",
153151
"1",
154152
"--global_batch_size",
155153
"8",
156-
"--moe_layer_freq",
157-
"[0]*1+[1]*3",
158154
"--expert_model_parallel_size",
159155
"8",
160156
],

0 commit comments

Comments
 (0)