File tree Expand file tree Collapse file tree
scripts/performance/configs/deepseek Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -118,6 +118,9 @@ def deepseek_v3_pretrain_config_gb200(
118118 cfg = pretrain_config ()
119119 cfg .mixed_precision = precision_config
120120
121+ if cfg .mixed_precision .fp8_recipe == "mxfp8" :
122+ cfg .model .fp8_output_proj = True
123+
121124 # Apply model-specific settings that were previously passed as constructor args
122125 cfg .model .pipeline_model_parallel_size = base_cfg .pipeline_model_parallel_size
123126 cfg .model .virtual_pipeline_model_parallel_size = base_cfg .virtual_pipeline_model_parallel_size
Original file line number Diff line number Diff line change 9595 recompute_modules = ["mla_up_proj" ],
9696)
9797DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_CS_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V1
98- DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V1
98+ DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = replace (
99+ DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V1 ,
100+ cuda_graph_impl = "full_iteration" ,
101+ cuda_graph_scope = [],
102+ moe_a2a_overlap = True ,
103+ cutedsl_fused_grouped_mlp = True ,
104+ recompute_modules = ["mla_up_proj" ],
105+ )
99106DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_NVFP4_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V1
100107
101108
199206 global_batch_size = 4096 ,
200207)
201208DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_CS_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V2
202- DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_MX_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V2
209+ DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_MX_V2 = replace (
210+ DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_FP8_MX_V1 ,
211+ global_batch_size = 4096 ,
212+ )
203213DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_NVFP4_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB200_V2
204214
205215
You can’t perform that action at this time.
0 commit comments