fix lint

lshqqytiger · lshqqytiger · commit 6d85a7dfa497 · 2025-06-24T13:26:46.000+09:00
diff --git a/modules/flash_attn_triton_amd/fwd_prefill.py b/modules/flash_attn_triton_amd/fwd_prefill.py
@@ -295,9 +295,9 @@ def attn_fwd(Q, K, V, bias, Cache_seqlens, Cache_batch_idx, # pylint: disable=un
             # The tensor allocated for L is based on MAX_SEQLENS_Q as that is
             # statically known.
             l_offset = LSE + off_z * stride_lse_z + off_h_q * stride_lse_h + cu_seqlens_q_start * stride_lse_m
-            l_ptrs = l_offset + offs_m * stride_lse_m 
+            l_ptrs = l_offset + offs_m * stride_lse_m
 
-            l = tl.full([BLOCK_M], value=0.0, dtype=ACCUMULATOR_TYPE)
+            l = tl.full([BLOCK_M], value=0.0, dtype=ACCUMULATOR_TYPE) # noqa: E741
 
             # mask_m_offsets = start_m + tl.arange(0, BLOCK_M)
             # lse_mask = mask_m_offsets < causal_start_idx
@@ -450,7 +450,7 @@ def attn_fwd(Q, K, V, bias, Cache_seqlens, Cache_batch_idx, # pylint: disable=un
 
     # write back LSE(Log Sum Exponents), the log of the normalization constant
     l_offset = LSE + off_z * stride_lse_z + off_h_q * stride_lse_h + cu_seqlens_q_start * stride_lse_m
-    l_ptrs = l_offset + offs_m * stride_lse_m 
+    l_ptrs = l_offset + offs_m * stride_lse_m
     if USE_EXP2:
         RCP_LN2: tl.constexpr = 1.4426950408889634
         LN2: tl.constexpr = 0.6931471824645996
@@ -499,9 +499,9 @@ def attention_prefill_forward_triton_impl(
                                         bias: Optional[torch.Tensor],
                                         layout: Literal["bshd", "bhsd", "thd"],
                                         # varlen
-                                        cu_seqlens_q: Optional[torch.Tensor], 
+                                        cu_seqlens_q: Optional[torch.Tensor],
                                         cu_seqlens_k: Optional[torch.Tensor],
-                                        max_seqlens_q: int, 
+                                        max_seqlens_q: int,
                                         max_seqlens_k: int,
                                         # inference
                                         cache_seqlens: Optional[Union[(int, torch.Tensor)]],
@@ -570,7 +570,7 @@ def attention_prefill_forward_triton_impl(
     attn_fwd[grid](q, k, v, bias, cache_seqlens, cache_batch_idx,
                     sm_scale, softmax_lse, o, *q_strides, *k_strides, *v_strides, *o_strides,
                     *bias_strides, stride_az, stride_ah, *scores_strides, stride_lse_z, stride_lse_h, stride_lse_m, cu_seqlens_q, cu_seqlens_k,
-                    dropout_p=dropout_p, philox_seed=philox_seed, philox_offset_base=philox_offset, sd_mask=sd_mask, dropout_mask=dropout_mask, alibi_slopes=alibi_slopes, 
+                    dropout_p=dropout_p, philox_seed=philox_seed, philox_offset_base=philox_offset, sd_mask=sd_mask, dropout_mask=dropout_mask, alibi_slopes=alibi_slopes,
                     HQ=nheads_q, HK=nheads_k, ACTUAL_BLOCK_DMODEL=head_size, MAX_SEQLENS_Q=max_seqlens_q,
                     MAX_SEQLENS_K=max_seqlens_k, IS_CAUSAL=causal, IS_VARLEN=is_varlen, IS_INFERENCE=is_inference,
                     BLOCK_DMODEL=padded_d_model, USE_BIAS=False if bias is None else True,
diff --git a/modules/flash_attn_triton_amd/utils.py b/modules/flash_attn_triton_amd/utils.py
@@ -42,7 +42,7 @@ class MetaData():
     rotary_cos: Optional[torch.Tensor] = None
     rotary_interleaved: bool = False
     rotary_conjunction: bool = False
-    
+
 
     def __repr__(self) -> str:
         return (f"MetaData(\n"
@@ -161,7 +161,7 @@ def generate_varlen_tensor(
     if batch_size is None:
         valid_batch_sizes = [bs for bs in [1, 2, 4, 8, 16, 32, 64] if bs <= total_seqlen]
         batch_size = random.choice(valid_batch_sizes)
-    
+
     # get seqlens
     if equal_seqlens:
         seqlens = torch.full(
@@ -241,14 +241,14 @@ def input_helper(
         TOTAL_SEQLENS_Q = BATCH * N_CTX_Q
         TOTAL_SEQLENS_K = BATCH * N_CTX_K
         equal_seqlens=False
-        
+
         # gen tensors
         # TODO: the gen functions should maybe have different gen modes like random, ones, increasing seqlen
         q, cu_seqlens_q, max_seqlen_q = generate_varlen_tensor(TOTAL_SEQLENS_Q, HQ, D_HEAD, batch_size=BATCH, dtype=dtype, device=device, equal_seqlens=equal_seqlens, DEBUG_INPUT=DEBUG_INPUT)
         k, cu_seqlens_k, max_seqlen_k = generate_varlen_tensor(TOTAL_SEQLENS_K, HK, D_HEAD, batch_size=BATCH, dtype=dtype, device=device, equal_seqlens=equal_seqlens, DEBUG_INPUT=DEBUG_INPUT)
         v, _, _ = generate_varlen_tensor(TOTAL_SEQLENS_K, HK, D_HEAD, batch_size=BATCH, dtype=dtype, device=device, equal_seqlens=equal_seqlens, DEBUG_INPUT=DEBUG_INPUT)
         do = torch.ones_like(q) if DEBUG_INPUT else torch.randn_like(q)
-        
+
         # setup metadata
         if DEBUG_INPUT:
             sm_scale = 1
@@ -313,7 +313,7 @@ def input_helper(
 
         return qkv, do, metadata
     else:
-        assert False, f"Unsupported packing mode: {packing}"
+        raise AssertionError(f"Unsupported packing mode: {packing}")
 
 # -------------------------------
 # Alibi
@@ -366,21 +366,21 @@ def get_shape_from_layout(
     elif  layout == 'thd':
         total_seqlen, num_heads, head_dim = x.shape
         if cu_seqlens is None:
-            raise ValueError("cu_seqlens must be provided for varlen (thd) layout") 
+            raise ValueError("cu_seqlens must be provided for varlen (thd) layout")
         if max_seqlen is None:
             raise ValueError("max_seqlen must be provided for varlen (thd) layout")
-        
+
         batch, max_seqlen_final, num_heads, head_dim = len(cu_seqlens) - 1, max_seqlen, num_heads, head_dim
     else:
-        assert False, "Got unsupported layout."
+        raise AssertionError("Got unsupported layout.")
 
     return batch, max_seqlen_final, num_heads, head_dim
 
 
 def get_shapes_from_layout(q, k, layout, cu_seqlens_q = None, cu_seqlens_k = None, max_seqlen_q=None, max_seqlen_k=None):
     batch_q, seqlen_q, nheads_q, head_size_q = get_shape_from_layout(q, layout, cu_seqlens_q, max_seqlen_q)
     batch_k, seqlen_k, nheads_k, head_size_k = get_shape_from_layout(k, layout, cu_seqlens_k, max_seqlen_k)
-    
+
     # assert
     assert batch_q == batch_k
     assert head_size_q == head_size_k
@@ -389,13 +389,13 @@ def get_shapes_from_layout(q, k, layout, cu_seqlens_q = None, cu_seqlens_k = Non
 
 def get_stride_from_layout(x: torch.Tensor, layout:Literal["bshd", "bhsd", "thd"]):
     if layout == 'thd':
-        strides = (0, x.stride(1), x.stride(0), x.stride(2))  
+        strides = (0, x.stride(1), x.stride(0), x.stride(2))
     elif layout == 'bhsd':
         strides = (x.stride(0), x.stride(1), x.stride(2), x.stride(3))
     elif layout == 'bshd':
         strides = (x.stride(0), x.stride(2), x.stride(1), x.stride(3))
     else:
-        assert False, 'Got unsupported layout.'
+        raise AssertionError('Got unsupported layout.')
     return strides
 
 def get_shape_and_strides_from_layout(x: torch.Tensor, layout: Literal["bshd", "bhsd", "thd"], cu_seqlens: Optional[torch.Tensor] = None, max_seqlen: Optional[int] = None):
@@ -458,22 +458,22 @@ def write_dropout_mask(x, tensor_name = "tensor"):
                 if True:
                     BLOCK_M = 64
                     BLOCK_N = 64
-                
+
                     # Calculate number of blocks in each dimension
                     m_blocks = math.ceil(seqlen_m / BLOCK_M)
                     n_blocks = math.ceil(seqlen_n / BLOCK_N)
-                    
+
                     # Process each block
                     for m_block in range(m_blocks):
                         # Calculate row range for current block
                         row_start = m_block * BLOCK_M
                         row_end = min(row_start + BLOCK_M, seqlen_m)
-                        
+
                         for n_block in range(n_blocks):
                             # Calculate column range for current block
                             col_start = n_block * BLOCK_N
                             col_end = min(col_start + BLOCK_N, seqlen_n)
-                            
+
                             # Extract and write the current block
                             for row_idx in range(row_start, row_end):
                                 row_data = dropout_mask[row_idx][col_start:col_end]
diff --git a/modules/processing.py b/modules/processing.py
@@ -260,7 +260,7 @@ def fill_fields_from_opts(self):
         self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
 
     @property
-    def sd_model(self):
+    def sd_model(self): # noqa: F811
         return shared.sd_model
 
     @sd_model.setter
@@ -1683,7 +1683,7 @@ def __post_init__(self):
         self.initial_noise_multiplier = opts.initial_noise_multiplier if self.initial_noise_multiplier is None else self.initial_noise_multiplier
 
     @property
-    def mask_blur(self):
+    def mask_blur(self): # noqa: F811
         if self.mask_blur_x == self.mask_blur_y:
             return self.mask_blur_x
         return None
diff --git a/modules/zluda.py b/modules/zluda.py
@@ -4,7 +4,7 @@
 from torch._prims_common import DeviceLikeType
 import onnxruntime as ort
 from modules import shared, devices, zluda_installer
-from modules.zluda_installer import core, default_agent # pylint: disable=unused-import
+from modules.zluda_installer import core, default_agent # noqa: F401
 from modules.onnx_impl.execution_providers import available_execution_providers, ExecutionProvider