@@ -100,7 +100,8 @@ def test_llama3_1_8B_FP8(self):
100100 "--model.n_layers" ,
101101 "4" ,
102102 "--training.steps" ,
103- "3" "--primus_turbo.enable_primus_turbo" ,
103+ "3" ,
104+ "--primus_turbo.enable_primus_turbo" ,
104105 "False" ,
105106 ],
106107 )
@@ -195,40 +196,40 @@ def test_qwen3_32B(self):
195196 ],
196197 )
197198
198- def test_deepseek_v3_16b (self ):
199- run_script (
200- self .__class__ .__name__ ,
201- "deepseek_v3_16b" ,
202- "examples/torchtitan/configs/MI300X/deepseek_v3_16b-pretrain.yaml" ,
203- extra_args = [
204- "--model.n_layers" ,
205- "4" ,
206- "--model.n_dense_layers" ,
207- "1" ,
208- "--training.steps" ,
209- "3" ,
210- "--primus_turbo.enable_primus_turbo" ,
211- "False" ,
212- "--model.moe_args.use_grouped_mm" ,
213- "False" ,
214- ],
215- )
216-
217- def test_deepseek_v3_671b (self ):
218- run_script (
219- self .__class__ .__name__ ,
220- "deepseek_v3_671b" ,
221- "examples/torchtitan/configs/MI300X/deepseek_v3_671b-pretrain.yaml" ,
222- extra_args = [
223- "--model.n_layers" ,
224- "4" ,
225- "--model.n_dense_layers" ,
226- "1" ,
227- "--training.steps" ,
228- "3" ,
229- "--primus_turbo.enable_primus_turbo" ,
230- "False" ,
231- "--model.moe_args.use_grouped_mm" ,
232- "False" ,
233- ],
234- )
199+ # def test_deepseek_v3_16b(self):
200+ # run_script(
201+ # self.__class__.__name__,
202+ # "deepseek_v3_16b",
203+ # "examples/torchtitan/configs/MI300X/deepseek_v3_16b-pretrain.yaml",
204+ # extra_args=[
205+ # "--model.n_layers",
206+ # "4",
207+ # "--model.n_dense_layers",
208+ # "1",
209+ # "--training.steps",
210+ # "3",
211+ # "--primus_turbo.enable_primus_turbo",
212+ # "False",
213+ # "--model.moe_args.use_grouped_mm",
214+ # "False",
215+ # ],
216+ # )
217+
218+ # def test_deepseek_v3_671b(self):
219+ # run_script(
220+ # self.__class__.__name__,
221+ # "deepseek_v3_671b",
222+ # "examples/torchtitan/configs/MI300X/deepseek_v3_671b-pretrain.yaml",
223+ # extra_args=[
224+ # "--model.n_layers",
225+ # "4",
226+ # "--model.n_dense_layers",
227+ # "1",
228+ # "--training.steps",
229+ # "3",
230+ # "--primus_turbo.enable_primus_turbo",
231+ # "False",
232+ # "--model.moe_args.use_grouped_mm",
233+ # "False",
234+ # ],
235+ # )
0 commit comments