fix lint

yuankaichen-amd · yuankaichen-amd · commit 10c106b39986 · 2025-11-10T11:40:10.000-06:00
diff --git a/primus/core/projection/memory_projection/__init__.py b/primus/core/projection/memory_projection/__init__.py
@@ -2,4 +2,4 @@
 
 __all__ = [
     launch_projection_from_cli,
-]
+]
diff --git a/primus/core/projection/memory_projection/projection.py b/primus/core/projection/memory_projection/projection.py
@@ -1,8 +1,6 @@
-import argparse
 import os
-import sys
 from pathlib import Path
- 
+
 from primus.core.launcher.parser import PrimusParser
 from primus.core.projection.training_config import convert_primus_config_to_projection_config
 from primus.core.projection.module_profilers.language_model import build_profiler, get_language_model_profiler_spec
@@ -11,7 +9,7 @@
 def print_profiler_hierarchy(profiler, batch_size, seq_len, rank=None, name="root", depth=0, visited=None):
     """
     Recursively print the profiler hierarchy with num_params and activation_memory for each component.
-    
+
     Args:
         profiler: The profiler instance to print
         batch_size: Batch size for activation memory calculation
@@ -23,15 +21,15 @@ def print_profiler_hierarchy(profiler, batch_size, seq_len, rank=None, name="roo
     """
     if visited is None:
         visited = set()
-    
+
     # Avoid infinite recursion if profilers reference each other
     profiler_id = id(profiler)
     if profiler_id in visited:
         return
     visited.add(profiler_id)
-    
+
     indent = "  " * depth
-    
+
     # Calculate metrics for this profiler
     try:
         if depth == 0:
@@ -44,7 +42,7 @@ def print_profiler_hierarchy(profiler, batch_size, seq_len, rank=None, name="roo
             print(f"{indent}[{name}]")
             print(f"{indent}  Params: {num_params / 1e9:.6f} Billion ({num_params:,})")
             print(f"{indent}  Activation Memory: {activation_mem / 1024 / 1024 / 1024:.4f} GB")
-        
+
         # Recursively process sub_profilers if they exist
         if hasattr(profiler, 'sub_profilers') and profiler.sub_profilers:
             for sub_name, sub_profiler in profiler.sub_profilers.items():
@@ -75,16 +73,16 @@ def launch_projection_from_cli(args, overrides):
     seq_len = training_config.runtime_config.sequence_length
     batch_size = training_config.runtime_config.micro_batch_size
     rank = int(os.getenv('RANK', '0'))
-    
+
     # Print recursive profiler hierarchy with detailed breakdown
     print("\n" + "=" * 100)
     print(f"[Primus:Projection] Component-wise Profiling Results (Rank {rank}):")
     print("=" * 100)
     print()
-    
+
     # Print the complete hierarchy recursively
     print_profiler_hierarchy(model_profiler, batch_size, seq_len, rank=rank, name="LanguageModelProfiler", depth=0)
-    
+
     # Get overall totals from the model profiler for this rank
     num_params = model_profiler.estimated_num_params(rank=rank)
     activation_memory = model_profiler.estimated_activation_memory(batch_size, seq_len)
@@ -98,4 +96,4 @@ def launch_projection_from_cli(args, overrides):
           f"{activation_memory / 1024 / 1024 / 1024:.4f} GB")
     print(f"  Projected Total Memory: "
           f"{(num_params * num_bytes_per_param + activation_memory) / 1024 / 1024 / 1024:.4f} GB")
-    print("=" * 100)
+    print("=" * 100)
diff --git a/primus/core/projection/module_profilers/attention.py b/primus/core/projection/module_profilers/attention.py
@@ -6,8 +6,6 @@
 
 
 from primus.core.projection.base_module_profiler import BaseModuleProfiler
-from primus.core.projection.profiler_spec import ModuleProfilerSpec
-from primus.core.projection.training_config import TrainingConfig
 
 
 class AttentionProfiler(BaseModuleProfiler):
diff --git a/primus/core/projection/module_profilers/embedding.py b/primus/core/projection/module_profilers/embedding.py
@@ -6,8 +6,6 @@
 
 
 from primus.core.projection.base_module_profiler import BaseModuleProfiler
-from primus.core.projection.profiler_spec import ModuleProfilerSpec
-from primus.core.projection.training_config import TrainingConfig
 
 
 class EmbeddingProfiler(BaseModuleProfiler):
@@ -17,5 +15,5 @@ def estimated_num_params(self, rank: int | None = None) -> int:
     def estimated_activation_memory(self, batch_size: int, seq_len: int) -> int:
         return  (batch_size * seq_len //
                  self.config.model_parallel_config.tensor_model_parallel_size //
-                 self.config.model_parallel_config.context_model_parallel_size * 
+                 self.config.model_parallel_config.context_model_parallel_size *
                  self.config.model_config.hidden_size * 2)  # bf16
diff --git a/primus/core/projection/module_profilers/language_model.py b/primus/core/projection/module_profilers/language_model.py
@@ -90,7 +90,7 @@ def get_layers_for_rank(
         total_stages = pp_size
         if num_virtual_pipeline_stages is not None:
             total_stages = total_stages * num_virtual_pipeline_stages
-            
+
         if n_layers % total_stages != 0:
             raise ValueError(
                 f"Total number of layers ({n_layers}) must be divisible by "
@@ -100,22 +100,22 @@ def get_layers_for_rank(
         model_parallel_size = pp_size * tp_size * cp_size * ep_size
         model_parallel_rank = global_rank % model_parallel_size
         pp_rank = model_parallel_rank // (tp_size * cp_size * ep_size)
-        
+
         # Calculate how many layers are in each virtual stage (chunk)
         layers_per_virtual_stage = n_layers // total_stages
-        
+
         # A physical pp_rank hosts multiple virtual stages in an interleaved fashion.
         # pp_rank 0 gets virtual stages: 0, pp_size, 2*pp_size, ...
         # pp_rank 1 gets virtual stages: 1, pp_size+1, 2*pp_size+1, ...
         my_virtual_stages = range(pp_rank, total_stages, pp_size)
-        
+
         assigned_layers = []
         for vs_index in my_virtual_stages:
             start_layer = vs_index * layers_per_virtual_stage
             end_layer = (vs_index + 1) * layers_per_virtual_stage - 1
             for layer in range(start_layer, end_layer + 1):
                 assigned_layers.append(layer)
-            
+
         return assigned_layers
 
     def get_dp_size(self) -> int:
@@ -160,7 +160,6 @@ def estimated_num_params(self, rank: int | None = None) -> int:
             total_params += self.sub_profilers["calc_loss"].estimated_num_params(rank)
         return total_params
 
-
     def estimated_activation_memory(self, batch_size: int, seq_len: int) -> int:
         total_act = 0
         pp_size = self.config.model_parallel_config.pipeline_model_parallel_size
@@ -186,4 +185,4 @@ def estimated_activation_memory(self, batch_size: int, seq_len: int) -> int:
         ga = self.config.runtime_config.global_batch_size // self.get_dp_size()
         gs_saving = 1 if ga > pp_size else ga / pp_size
         total_act *= gs_saving * interleaved_schedule_memory_penalty
-        return total_act
+        return total_act
diff --git a/primus/core/projection/module_profilers/loss.py b/primus/core/projection/module_profilers/loss.py
@@ -6,8 +6,6 @@
 
 
 from primus.core.projection.base_module_profiler import BaseModuleProfiler
-from primus.core.projection.profiler_spec import ModuleProfilerSpec
-from primus.core.projection.training_config import TrainingConfig
 
 
 class LossProfiler(BaseModuleProfiler):
diff --git a/primus/core/projection/module_profilers/moe_mlp.py b/primus/core/projection/module_profilers/moe_mlp.py
@@ -16,7 +16,7 @@ def estimated_num_params(self, rank: int | None = None) -> int:
             moe_ffn = self.config.model_config.moe_ffn_hidden_size
         else:
             moe_ffn = self.config.model_config.ffn_hidden_size
-        
+
         # For SwiGLU: 3 projections per expert (gate, up, down)
         # For standard FFN: 2 projections per expert (up, down)
         num_ffn_projections = 3 if self.config.model_config.swiglu else 2
@@ -31,7 +31,7 @@ def estimated_num_params(self, rank: int | None = None) -> int:
         if self.config.model_config.moe_shared_expert_intermediate_size is not None:
             shared_sz = self.config.model_config.moe_shared_expert_intermediate_size
         shared_params = num_ffn_projections * self.config.model_config.hidden_size * shared_sz
-        
+
         return all_experts_params + shared_params
 
     def estimated_activation_memory(self, batch_size: int, seq_len: int) -> int:
@@ -60,4 +60,4 @@ def get_moe_mlp_profiler_spec(config: TrainingConfig) -> ModuleProfilerSpec:
         profiler=MoEMLPProfiler,
         config=config,
         sub_profiler_specs=None,
-    )
+    )
diff --git a/primus/core/projection/module_profilers/output_layer.py b/primus/core/projection/module_profilers/output_layer.py
@@ -15,5 +15,5 @@ def estimated_num_params(self, rank: int | None = None) -> int:
     def estimated_activation_memory(self, batch_size: int, seq_len: int) -> int:
         return  (batch_size * seq_len //
                  self.config.model_parallel_config.tensor_model_parallel_size //
-                 self.config.model_parallel_config.context_model_parallel_size * 
+                 self.config.model_parallel_config.context_model_parallel_size *
                  self.config.model_config.padded_vocab_size * 2)  # bf16
diff --git a/primus/core/projection/training_config.py b/primus/core/projection/training_config.py
@@ -139,4 +139,4 @@ def convert_primus_config_to_projection_config(primus_config) -> TrainingConfig:
         model_parallel_config=model_parallel_config,
     )
 
-    return training_config
+    return training_config
diff --git a/primus/tools/benchmark/deepseek_dense_gemm_bench_args.py b/primus/tools/benchmark/deepseek_dense_gemm_bench_args.py
@@ -32,4 +32,3 @@ def add_gemm_parser(parser: argparse.ArgumentParser):
     parser.add_argument("--output-file", default="./gemm-deepseek_report.md")
     parser.add_argument("--append", action="store_true", help="Append to existing report")
     return parser
-
diff --git a/primus/tools/benchmark/dense_gemm_bench.py b/primus/tools/benchmark/dense_gemm_bench.py
@@ -104,7 +104,7 @@ def build_gemm_preamble(args, shape_defs: List[Tuple[str, List[int]]]) -> str:
         "",
         f"- Model: {args.model or 'Custom'}",
         f"- Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
-        f"- Cluster: amd-aig-poolside",
+        "- Cluster: amd-aig-poolside",
         f"- Duration per shape: {args.duration} sec",
         "",
         "## Configuration",
diff --git a/primus/tools/benchmark/dense_gemm_bench_args.py b/primus/tools/benchmark/dense_gemm_bench_args.py
@@ -24,4 +24,3 @@ def add_gemm_parser(parser: argparse.ArgumentParser):
     parser.add_argument("--output-file", default="./gemm-dense_report.md")
     parser.add_argument("--duration", type=int, default=3, help="Benchmark duration per shape (sec)")
     return parser
-
diff --git a/primus/tools/benchmark/gemm_bench_args.py b/primus/tools/benchmark/gemm_bench_args.py
@@ -27,4 +27,3 @@ def add_gemm_parser(parser: argparse.ArgumentParser):
     )
 
     return parser
-
diff --git a/primus/tools/benchmark/strided_allgather_bench_args.py b/primus/tools/benchmark/strided_allgather_bench_args.py
@@ -61,4 +61,3 @@ def add_arguments(parser: argparse.ArgumentParser):
     )
 
     return parser
-
diff --git a/third_party/torchtitan b/third_party/torchtitan
@@ -1 +1 @@
-Subproject commit 99c0cb28f615d99290273afa1da01fd72f01f1a5
+Subproject commit 5fb7cc2e3bbb9b9dc0ab7af34ed5cc58b5f32021

Original file line number	Diff line number	Diff line change
`@@ -139,4 +139,4 @@ def convert_primus_config_to_projection_config(primus_config) -> TrainingConfig:`
`139`	`139`	`model_parallel_config=model_parallel_config,`
`140`	`140`	`)`
`141`	`141`
`142`		`- return training_config`
	`142`	`+ return training_config`