We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 5c9ea4c commit 60a3947Copy full SHA for 60a3947
1 file changed
src/megatron/bridge/recipes/moonlight/moonlight_16b.py
@@ -156,7 +156,7 @@ def moonlight_16b_pretrain_config() -> ConfigContainer:
156
cfg.comm_overlap = CommOverlapConfig(tp_comm_overlap=False)
157
cfg.comm_overlap.delay_wgrad_compute = False
158
cfg.comm_overlap.overlap_moe_expert_parallel_comm = False
159
- cfg.model.moe_shared_expert_overlap = True
+ cfg.model.moe_shared_expert_overlap = False
160
161
# Checkpoint config
162
cfg.checkpoint.save_interval = 2000
0 commit comments