test(torchtitan): fix missing comma in extra_args and temporarily disable DeepSeek-V3 UTs

Xiaoming-AMD · Xiaoming-AMD · commit f611adc63d5b · 2025-10-31T02:07:27.000-05:00
diff --git a/examples/torchtitan/configs/MI300X/llama3.1_405B-pretrain.yaml b/examples/torchtitan/configs/MI300X/llama3.1_405B-pretrain.yaml
@@ -31,5 +31,5 @@ modules:
         mode: full
 
       primus_turbo:
-        enable_primus_turbo: true
+        enable_primus_turbo: false
         enable_attention_float8: false
diff --git a/tests/trainer/test_torchtitan_trainer.py b/tests/trainer/test_torchtitan_trainer.py
@@ -100,7 +100,8 @@ def test_llama3_1_8B_FP8(self):
                 "--model.n_layers",
                 "4",
                 "--training.steps",
-                "3" "--primus_turbo.enable_primus_turbo",
+                "3",
+                "--primus_turbo.enable_primus_turbo",
                 "False",
             ],
         )
@@ -195,40 +196,40 @@ def test_qwen3_32B(self):
             ],
         )
 
-    def test_deepseek_v3_16b(self):
-        run_script(
-            self.__class__.__name__,
-            "deepseek_v3_16b",
-            "examples/torchtitan/configs/MI300X/deepseek_v3_16b-pretrain.yaml",
-            extra_args=[
-                "--model.n_layers",
-                "4",
-                "--model.n_dense_layers",
-                "1",
-                "--training.steps",
-                "3",
-                "--primus_turbo.enable_primus_turbo",
-                "False",
-                "--model.moe_args.use_grouped_mm",
-                "False",
-            ],
-        )
-
-    def test_deepseek_v3_671b(self):
-        run_script(
-            self.__class__.__name__,
-            "deepseek_v3_671b",
-            "examples/torchtitan/configs/MI300X/deepseek_v3_671b-pretrain.yaml",
-            extra_args=[
-                "--model.n_layers",
-                "4",
-                "--model.n_dense_layers",
-                "1",
-                "--training.steps",
-                "3",
-                "--primus_turbo.enable_primus_turbo",
-                "False",
-                "--model.moe_args.use_grouped_mm",
-                "False",
-            ],
-        )
+    # def test_deepseek_v3_16b(self):
+    #     run_script(
+    #         self.__class__.__name__,
+    #         "deepseek_v3_16b",
+    #         "examples/torchtitan/configs/MI300X/deepseek_v3_16b-pretrain.yaml",
+    #         extra_args=[
+    #             "--model.n_layers",
+    #             "4",
+    #             "--model.n_dense_layers",
+    #             "1",
+    #             "--training.steps",
+    #             "3",
+    #             "--primus_turbo.enable_primus_turbo",
+    #             "False",
+    #             "--model.moe_args.use_grouped_mm",
+    #             "False",
+    #         ],
+    #     )
+
+    # def test_deepseek_v3_671b(self):
+    #     run_script(
+    #         self.__class__.__name__,
+    #         "deepseek_v3_671b",
+    #         "examples/torchtitan/configs/MI300X/deepseek_v3_671b-pretrain.yaml",
+    #         extra_args=[
+    #             "--model.n_layers",
+    #             "4",
+    #             "--model.n_dense_layers",
+    #             "1",
+    #             "--training.steps",
+    #             "3",
+    #             "--primus_turbo.enable_primus_turbo",
+    #             "False",
+    #             "--model.moe_args.use_grouped_mm",
+    #             "False",
+    #         ],
+    #     )