THUDM
diff --git a/‎examples/fully_async/fully_async_rollout.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/fully_async/fully_async_rollout.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/multi_agent/agent_system.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/multi_agent/agent_system.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/on_policy_distillation/on_policy_distillation.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/on_policy_distillation/on_policy_distillation.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/retool/generate_with_retool.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/retool/generate_with_retool.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/search-r1/generate_with_search.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/search-r1/generate_with_search.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/search-r1/qa_em_format.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/search-r1/qa_em_format.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/train_infer_mismatch_helper/mis.py‎
Lines changed: 15 additions & 7 deletions b/‎examples/train_infer_mismatch_helper/mis.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎slime/backends/fsdp_utils/actor.py‎
Lines changed: 5 additions & 5 deletions b/‎slime/backends/fsdp_utils/actor.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎slime/backends/fsdp_utils/update_weight_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎slime/backends/fsdp_utils/update_weight_utils.py‎
Lines changed: 2 additions & 2 deletions
@@ -90,8 +90,8 @@ async def continuous_worker_loop(self):
 
                         # Add completion callback
                         def make_callback(gid):
-                            def task_done_callback(task):
-                                result = task.result()
+                            def task_done_callback(done_task):
+                                result = done_task.result()
                                 self.output_queue.put((gid, result))
 
                             return task_done_callback
 
@@ -85,7 +85,7 @@ def __init__(self):
 
     async def run(self, args, prompt, max_retries: int = 1, key: str = None) -> str:
         """Runs the agent by sending a prompt to the LLM."""
-        for i in range(max_retries):
+        for _i in range(max_retries):
             try:
                 response = await generate_response(args, prompt, key=key)
                 return response
@@ -200,7 +200,7 @@ async def run_agent_system(args, sample):
     results = await asyncio.gather(*tasks, return_exceptions=True)
 
     rewards = await batched_async_rm(args, args.results_dict["solver"])
-    for sample, reward in zip(args.results_dict["solver"], rewards):
+    for sample, reward in zip(args.results_dict["solver"], rewards, strict=False):
         sample.reward = reward
 
     previous_solutions = [item for item in results if isinstance(item, str)]
@@ -223,12 +223,12 @@ def reward_adjustment(samples, reward_weight):
 
     # 处理异常结果
     rewrited_solutions = []
-    for i, result in enumerate(rewrited_solutions_raw):
+    for _i, result in enumerate(rewrited_solutions_raw):
         if isinstance(result, str):
             rewrited_solutions.append(result)
 
     rewards = await batched_async_rm(args, args.results_dict["rewriter"])
-    for sample, reward in zip(args.results_dict["rewriter"], rewards):
+    for sample, reward in zip(args.results_dict["rewriter"], rewards, strict=False):
         sample.reward = reward
 
     if len(rewrited_solutions) == 0:
 
@@ -30,10 +30,11 @@ def post_process_rewards(args, samples: list[Sample], **kwargs):
         for reward in rewards
     ]
     teacher_log_probs = [
-        t_log_prob[-response_length:] for t_log_prob, response_length in zip(teacher_log_probs, response_lengths)
+        t_log_prob[-response_length:]
+        for t_log_prob, response_length in zip(teacher_log_probs, response_lengths, strict=False)
     ]
 
-    for sample, t_log_probs in zip(samples, teacher_log_probs):
+    for sample, t_log_probs in zip(samples, teacher_log_probs, strict=False):
         sample.teacher_log_probs = t_log_probs
 
     return teacher_log_probs, teacher_log_probs
@@ -4,8 +4,8 @@
 
 try:
     from jinja2 import Template
-except ImportError:
-    raise ImportError("Jinja2 is required. Please install it with: pip install jinja2")
+except ImportError as e:
+    raise ImportError("Jinja2 is required. Please install it with: pip install jinja2") from e
 
 from slime.rollout.sglang_rollout import GenerateState
 from slime.utils.http_utils import post
@@ -14,8 +14,8 @@
 # Import reward models
 try:
     from slime.rollout.rm_hub.math_dapo_utils import compute_score as math_dapo_compute_score
-except ImportError:
-    raise ImportError("MathDapo is not installed")
+except ImportError as e:
+    raise ImportError("MathDapo is not installed") from e
 
 # Import tool sandbox functionality
 from tool_sandbox import SEMAPHORE, TOOL_CONFIGS, tool_registry
 
@@ -157,7 +157,7 @@ async def generate(args, sample: Sample, sampling_params) -> Sample:
     loss_mask = []
     rollout_log_probs = [] if SEARCH_R1_CONFIGS["return_logprob"] else None
 
-    for turn_idx in range(SEARCH_R1_CONFIGS["max_turns"]):
+    for _turn_idx in range(SEARCH_R1_CONFIGS["max_turns"]):
         payload = {
             "text": prompt + response,
             "sampling_params": sampling_params,
 
@@ -78,7 +78,7 @@ def is_valid_sequence(text):
     state = "start"  # start -> think -> search -> information -> think -> ... -> answer -> end
 
     # 3. Check each part
-    for i, part in enumerate(parts):
+    for _i, part in enumerate(parts):
         # Skip empty parts
         if not part.strip():
             continue
 
@@ -145,7 +145,7 @@ def compute_mis_weights(
         len(train_log_probs) == len(rollout_log_probs) == len(loss_masks)
     ), f"Input lists must have the same number of sequences: {len(train_log_probs)} vs {len(rollout_log_probs)} vs {len(loss_masks)}"
 
-    for i, (train, rollout, loss_mask) in enumerate(zip(train_log_probs, rollout_log_probs, loss_masks)):
+    for i, (train, rollout, loss_mask) in enumerate(zip(train_log_probs, rollout_log_probs, loss_masks, strict=False)):
         assert (
             train.shape == rollout.shape == loss_mask.shape
         ), f"Sequence {i}: shapes must match - train: {train.shape}, rollout: {rollout.shape}, loss_mask: {loss_mask.shape}"
@@ -164,15 +164,19 @@ def compute_log_ratio(raw_log_diff: torch.Tensor, mask: torch.Tensor, level: str
         else:
             raise ValueError(f"Invalid level: {level}")
 
-    for train_log_prob, rollout_log_prob, loss_mask in zip(train_log_probs, rollout_log_probs, loss_masks):
+    for train_log_prob, rollout_log_prob, loss_mask in zip(
+        train_log_probs, rollout_log_probs, loss_masks, strict=False
+    ):
         add_ppl_metrics(train_log_prob, rollout_log_prob, loss_mask, metrics)
 
     # only calculate mismatch metrics if TIS is not used
     if not args.use_tis:
         return None, loss_masks, metrics
 
     # handle each sequence independently
-    for train_log_prob, rollout_log_prob, loss_mask in zip(train_log_probs, rollout_log_probs, loss_masks):
+    for train_log_prob, rollout_log_prob, loss_mask in zip(
+        train_log_probs, rollout_log_probs, loss_masks, strict=False
+    ):
         loss_mask = loss_mask.float()
         raw_log_ratio_diff = train_log_prob - rollout_log_prob
         modified_mask = loss_mask.clone().float()
@@ -228,14 +232,14 @@ def compute_log_ratio(raw_log_diff: torch.Tensor, mask: torch.Tensor, level: str
         tis_level = args.tis_level if args.use_tis else "token"
         if tis_level == "token":
             # Token-level: normalize over all token weights
-            total_weights_sum = sum(masked_sum(w, m) for w, m in zip(all_weights, loss_masks))
+            total_weights_sum = sum(masked_sum(w, m) for w, m in zip(all_weights, loss_masks, strict=False))
             total_mask_count = sum(m.sum() for m in loss_masks)
             weights_mean = total_weights_sum / torch.clamp_min(total_mask_count, 1)
         elif tis_level == "sequence":
             # Sequence-level: normalize over sequence weights (one weight per sequence)
             # For each sequence, compute mean over valid tokens (they all have the same weight)
             # then average across sequences
-            seq_weights_means = [masked_mean(w, m) for w, m in zip(all_weights, loss_masks)]
+            seq_weights_means = [masked_mean(w, m) for w, m in zip(all_weights, loss_masks, strict=False)]
             weights_mean = sum(seq_weights_means) / len(seq_weights_means)
         else:
             raise ValueError(f"Unsupported tis_level: {tis_level}")
@@ -279,11 +283,15 @@ def compute_mis_weights_with_cp(
     # Gather cp slice from other cp ranks
     full_rollout_log_probs = [
         all_gather_with_cp(log_prob, total_length, response_length)
-        for log_prob, total_length, response_length in zip(rollout_log_probs, total_lengths, response_lengths)
+        for log_prob, total_length, response_length in zip(
+            rollout_log_probs, total_lengths, response_lengths, strict=False
+        )
     ]
     full_old_log_probs = [
         all_gather_with_cp(old_log_prob, total_length, response_length)
-        for old_log_prob, total_length, response_length in zip(train_log_probs, total_lengths, response_lengths)
+        for old_log_prob, total_length, response_length in zip(
+            train_log_probs, total_lengths, response_lengths, strict=False
+        )
     ]
 
     # Main logic for is (decoupled)
 
@@ -23,9 +23,16 @@ src_paths = ["slime", "slime_plugins"]
 line_length = 119
 
 [tool.ruff]
-line-length = 119
+line-length = 320  # TODO
+select = [
+    "E",      # Pycodestyle Errors (Structural/Fundamental Errors like bad indentation)
+    "F",      # Pyflakes (Core Errors: Unused imports, undefined names)
+    "B",      # Flake8-Bugbear (Logic Bugs: Variable shadowing, dangerous default arguments)
+    # "UP",     # pyupgrade (Modernization and compatibility issues) # TODO
+]
 ignore = [
-    "E402",
+    "E402", # module-import-not-at-top-of-file
+    "E501", # Line too long # TODO handle it later
 ]
 
 [tool.pytest.ini_options]
 
@@ -256,7 +256,7 @@ def _fsdp2_load_full_state_dict(self, model, full_state, device_mesh, cpu_offloa
         set_model_state_dict(model, full_state, options=options)
 
         # set_model_state_dict will not broadcast buffers, so we need to broadcast them manually.
-        for name, buf in model.named_buffers():
+        for _name, buf in model.named_buffers():
             dist.broadcast(buf, src=0)
 
         if is_cpu_offload:
@@ -476,7 +476,7 @@ def _log_rollout_data(self, rollout_id: int, rollout_data, packed_batches):
             if metric_key not in packed_batches[0]:
                 continue
             val = torch.tensor([0.0], device=torch.cuda.current_device())
-            for mbs_id, batches in enumerate(packed_batches):
+            for _mbs_id, batches in enumerate(packed_batches):
                 unpacked_batches = unpack_sequences(batches)
                 for unpacked_batch in unpacked_batches:
                     if isinstance(unpacked_batch[metric_key], torch.Tensor):
@@ -598,11 +598,11 @@ def _train_step(self, packed_batch, reported_accum, mbs_id, grad_accum):
 
             seq_kls = [
                 ((log_ratio_i * mask_i).sum() / mask_i.sum().clamp_min(1))
-                for log_ratio_i, mask_i in zip(log_ratio_splits, loss_masks)
+                for log_ratio_i, mask_i in zip(log_ratio_splits, loss_masks, strict=False)
             ]
 
             ppo_kl_list = []
-            for seq_kl, length in zip(seq_kls, response_lengths):
+            for seq_kl, length in zip(seq_kls, response_lengths, strict=False):
                 ppo_kl_list.append(seq_kl.expand(length))
 
             ppo_kl = torch.cat(ppo_kl_list)
@@ -976,7 +976,7 @@ def sum_of_sample_mean(x: torch.Tensor, response_lengths: list[int], loss_masks:
     return sum(
         [
             (x_i * loss_mask_i).sum() / torch.clamp_min(loss_mask_i.sum(), 1)
-            for x_i, loss_mask_i in zip(x.split(response_lengths, dim=0), loss_masks)
+            for x_i, loss_mask_i in zip(x.split(response_lengths, dim=0), loss_masks, strict=False)
         ]
     )
 
 
@@ -128,7 +128,7 @@ def update_bucket_weights(self, named_tensors) -> None:
 
         # Create flattened bucket for each dtype group
         serialized_tensors = []
-        for dtype, named_tensors in named_tensors_by_dtypes.items():
+        for _dtype, named_tensors in named_tensors_by_dtypes.items():
             flattened_tensor_bucket = FlattenedTensorBucket(named_tensors=named_tensors)
             metadata = flattened_tensor_bucket.get_metadata()
             flattened_tensor_data = {
@@ -241,7 +241,7 @@ def update_bucket_weights(self, named_tensors) -> None:
 
         handles = []
         # Broadcast parameters one by one with memory management
-        for name, param in named_tensors:
+        for _name, param in named_tensors:
             torch.cuda.empty_cache()
             # Ensure tensor is contiguous and on the right device
             param_data = param.data.contiguous()
Original file line number	Diff line number	Diff line change
`@@ -30,10 +30,11 @@ def post_process_rewards(args, samples: list[Sample], **kwargs):`
`30`	`30`	`for reward in rewards`
`31`	`31`	`]`
`32`	`32`	`teacher_log_probs = [`
`33`		`- t_log_prob[-response_length:] for t_log_prob, response_length in zip(teacher_log_probs, response_lengths)`
	`33`	`+ t_log_prob[-response_length:]`
	`34`	`+ for t_log_prob, response_length in zip(teacher_log_probs, response_lengths, strict=False)`
`34`	`35`	`]`
`35`	`36`
`36`		`- for sample, t_log_probs in zip(samples, teacher_log_probs):`
	`37`	`+ for sample, t_log_probs in zip(samples, teacher_log_probs, strict=False):`
`37`	`38`	`sample.teacher_log_probs = t_log_probs`
`38`	`39`
`39`	`40`	`return teacher_log_probs, teacher_log_probs`