lint fixing

yuankaichen-amd · yuankaichen-amd · commit 2957b055941e · 2025-11-12T19:10:21.000-06:00
diff --git a/primus/backends/megatron/core/extensions/primus_turbo.py b/primus/backends/megatron/core/extensions/primus_turbo.py
@@ -731,7 +731,9 @@ def __init__(
         )
         args = get_args()
 
-        if (args.patch_zero_bubble and args.enable_zero_bubble) or (args.patch_moe_overlap and args.overlap_moe_expert_parallel_comm):
+        if (args.patch_zero_bubble and args.enable_zero_bubble) or (
+            args.patch_moe_overlap and args.overlap_moe_expert_parallel_comm
+        ):
             from .zbpp_gemm import grouped_gemm_with_weight_gradient_store
 
             self.grouped_gemm = functools.partial(
diff --git a/primus/backends/megatron/core/extensions/zbpp_gemm.py b/primus/backends/megatron/core/extensions/zbpp_gemm.py
@@ -103,7 +103,7 @@ def forward(
     ):
         if wgrad_gemm_backend_func is None:
             wgrad_gemm_backend_func = group_gemm_backend_func
-        ctx.use_main_grad = hasattr(weight, 'main_grad') and weight.main_grad is not None
+        ctx.use_main_grad = hasattr(weight, "main_grad") and weight.main_grad is not None
         if ctx.use_main_grad:
             ctx.weight_main_grad = weight.main_grad
         ctx.weight_shape_ori = weight.shape
diff --git a/primus/backends/megatron/core/models/common/model_chunk_schedule_plan.py b/primus/backends/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -5,15 +5,14 @@
 ###############################################################################
 
 import torch
-
-from megatron.core.pipeline_parallel.utils import (
-    get_comm_stream,
-)
-
 from megatron.core.models.common.model_chunk_schedule_plan import (
     TransformerLayerSchedulePlan,
+)
+from megatron.core.models.common.model_chunk_schedule_plan import (
     TransformerModelChunkSchedulePlan as TransformerModelChunkSchedulePlanBase,
 )
+from megatron.core.pipeline_parallel.utils import get_comm_stream
+
 from primus.backends.megatron.core.pipeline_parallel.zerobubble.zbpp_utils import (
     WeightGradStore,
 )
diff --git a/primus/modules/trainer/megatron/pre_trainer.py b/primus/modules/trainer/megatron/pre_trainer.py
@@ -234,14 +234,18 @@ def forward_step(self, data_iterator, model: GPTModel, return_schedule_plan=Fals
                     args.overlap_moe_expert_parallel_comm
                 ), "overlap_moe_expert_parallel_comm must be enabled to return the schedule plan"
                 if args.patch_moe_overlap:
-                    assert not args.delay_wgrad_compute, "Primus MoE overlap handles wgrad separately from the original Megatron implementation"
+                    assert (
+                        not args.delay_wgrad_compute
+                    ), "Primus MoE overlap handles wgrad separately from the original Megatron implementation"
                     from primus.backends.megatron.core.pipeline_parallel.zerobubble.zbpp_utils import (
                         WeightGradStore,
                     )
+
                     WeightGradStore.enable_split_bw()
                     from primus.backends.megatron.core.models.common.model_chunk_schedule_plan import (
                         TransformerModelChunkSchedulePlan,
                     )
+
                     schedule_plan = TransformerModelChunkSchedulePlan(
                         model, tokens, position_ids, attention_mask, labels=labels, loss_mask=loss_mask
                     )

Original file line number	Diff line number	Diff line change
`@@ -731,7 +731,9 @@ def __init__(`
`731`	`731`	`)`
`732`	`732`	`args = get_args()`
`733`	`733`
`734`		`- if (args.patch_zero_bubble and args.enable_zero_bubble) or (args.patch_moe_overlap and args.overlap_moe_expert_parallel_comm):`
	`734`	`+ if (args.patch_zero_bubble and args.enable_zero_bubble) or (`
	`735`	`+ args.patch_moe_overlap and args.overlap_moe_expert_parallel_comm`
	`736`	`+ ):`
`735`	`737`	`from .zbpp_gemm import grouped_gemm_with_weight_gradient_store`
`736`	`738`
`737`	`739`	`self.grouped_gemm = functools.partial(`