Enable flake8-bugbear (#530)

akihironitta · web-flow · commit dc2e58393758 · 2025-05-02T18:45:02.000+01:00
diff --git a/benchmark/data_frame_text_benchmark.py b/benchmark/data_frame_text_benchmark.py
@@ -529,7 +529,7 @@ def main_torch(
                     "Currently Trompt with finetuning is too expensive")
             model_cls = Trompt
             stype_encoder_dicts = []
-            for i in range(train_cfg["num_layers"]):
+            for _ in range(train_cfg["num_layers"]):
                 stype_encoder_dicts.append(
                     get_stype_encoder_dict(text_stype, text_encoder,
                                            train_tensor_frame))
diff --git a/pyproject.toml b/pyproject.toml
@@ -70,7 +70,13 @@ name="torch_frame"
 
 [tool.ruff]  # https://docs.astral.sh/ruff/rules
 target-version = "py39"
+src = ["torch_frame", "test", "examples", "benchmark"]
+line-length = 80
+indent-width = 4
+
+[tool.ruff.lint]
 select = [
+    "B",  # flake8-bugbear
     "D",  # pydocstyle
     "UP", # pyupgrade
 ]
@@ -83,13 +89,6 @@ ignore = [
     "D107",  # Ignore "Missing docstring in __init__"
     "D205",  # Ignore "blank line required between summary line and description"
 ]
-src = ["torch_frame"]
-line-length = 80
-indent-width = 4
-
-# [tool.ruff.per-files-ignores]
-
-
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
diff --git a/torch_frame/config/image_embedder.py b/torch_frame/config/image_embedder.py
@@ -17,9 +17,6 @@ class ImageEmbedder(ABC):
     override :meth:`forward_retrieve` which takes the paths to images and
     return a list of :obj:`PIL.Image.Image`.
     """
-    def __init__(self, *args, **kwargs):
-        pass
-
     def forward_retrieve(self, path_to_images: list[str]) -> list[Image.Image]:
         r"""Retrieval function that reads a list of images from
         a list of file paths with the :obj:`RGB` mode.
diff --git a/torch_frame/data/dataset.py b/torch_frame/data/dataset.py
@@ -3,7 +3,6 @@
 import copy
 import functools
 import os.path as osp
-from abc import ABC
 from collections import defaultdict
 from typing import Any
 
@@ -324,7 +323,7 @@ def __call__(
         return self._merge_feat(tf)
 
 
-class Dataset(ABC):
+class Dataset:
     r"""A base class for creating tabular datasets.
 
     Args:
@@ -382,7 +381,7 @@ def __init__(
         col_to_image_embedder_cfg: dict[str, ImageEmbedderConfig]
         | ImageEmbedderConfig | None = None,
         col_to_time_format: str | None | dict[str, str | None] = None,
-    ):
+    ) -> None:
         self.df = df
         self.target_col = target_col
 
diff --git a/torch_frame/data/multi_embedding_tensor.py b/torch_frame/data/multi_embedding_tensor.py
@@ -194,7 +194,7 @@ def _single_index_select(
                 values=values,
                 offset=offset,
             )
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
 
     def fillna_col(
         self,
@@ -290,4 +290,4 @@ def cat(
             offset = torch.tensor(offset_list)
             return MultiEmbeddingTensor(num_rows, num_cols, values, offset)
 
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
diff --git a/torch_frame/data/multi_tensor.py b/torch_frame/data/multi_tensor.py
@@ -59,7 +59,7 @@ def size(self, dim: int) -> int:
             return self.num_rows
         elif dim == 1:
             return self.num_cols
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
 
     def dim(self) -> int:
         return self.ndim
@@ -243,7 +243,7 @@ def index_select(self, index: Tensor, dim: int) -> _MultiTensor:
             return self._row_index_select(idx)
         elif dim == 1:
             return self._col_index_select(idx)
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
 
     def _row_index_select(self, index: Tensor) -> _MultiTensor:
         raise NotImplementedError
@@ -300,7 +300,7 @@ def narrow(self, dim: int, start: int, length: int) -> _MultiTensor:
             return self._row_narrow(start, length)
         elif dim == 1:
             return self._col_narrow(start, length)
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
 
     def _row_narrow(self, start: int, length: int) -> _MultiTensor:
         raise NotImplementedError
@@ -339,7 +339,7 @@ def select(
                 torch.tensor(index, dtype=torch.long, device=self.device),
                 dim=dim,
             )
-        assert False, "Should not reach here."
+        raise AssertionError("Should not reach here.")
 
     def _single_index_select(self, index: int, dim: int) -> _MultiTensor:
         raise NotImplementedError
diff --git a/torch_frame/datasets/data_frame_benchmark.py b/torch_frame/datasets/data_frame_benchmark.py
@@ -777,7 +777,7 @@ def __init__(
 
         # Check the scale
         if dataset.num_rows < 5000:
-            assert False
+            raise AssertionError()
         elif dataset.num_rows < 50000:
             assert scale == "small"
         elif dataset.num_rows < 500000:
diff --git a/torch_frame/datasets/fake.py b/torch_frame/datasets/fake.py
@@ -58,7 +58,7 @@ def __init__(
         self,
         num_rows: int,
         with_nan: bool = False,
-        stypes: list[stype] = [stype.categorical, stype.numerical],
+        stypes: list[stype] | None = None,
         create_split: bool = False,
         task_type: TaskType = TaskType.REGRESSION,
         col_to_text_embedder_cfg: dict[str, TextEmbedderConfig]
@@ -69,6 +69,7 @@ def __init__(
         | ImageEmbedderConfig | None = None,
         tmp_path: str | None = None,
     ) -> None:
+        stypes = stypes or [stype.categorical, stype.numerical]
         assert len(stypes) > 0
         df_dict: dict[str, list | np.ndarray]
         arr: list | np.ndarray
@@ -137,7 +138,7 @@ def __init__(
         if stype.sequence_numerical in stypes:
             for col_name in ['seq_num_1', 'seq_num_2']:
                 arr = []
-                for i in range(num_rows):
+                for _ in range(num_rows):
                     sequence_length = random.randint(1, 5)
                     sequence = [
                         random.random() for _ in range(sequence_length)
diff --git a/torch_frame/datasets/huggingface_dataset.py b/torch_frame/datasets/huggingface_dataset.py
@@ -86,8 +86,9 @@ def __init__(
     ) -> None:
         try:
             from datasets import DatasetDict, load_dataset
-        except ImportError:  # pragma: no cover
-            raise ImportError("Please run `pip install datasets` at first.")
+        except ImportError as e:  # pragma: no cover
+            raise ImportError(
+                "Please run `pip install datasets` first.") from e
         dataset = load_dataset(path, name=name)
         if not isinstance(dataset, DatasetDict):
             raise ValueError(f"{self.__class__} only supports `DatasetDict`")
diff --git a/torch_frame/gbdt/gbdt.py b/torch_frame/gbdt/gbdt.py
@@ -68,8 +68,14 @@ def is_fitted(self) -> bool:
         r"""Whether the GBDT is already fitted."""
         return self._is_fitted
 
-    def tune(self, tf_train: TensorFrame, tf_val: TensorFrame, num_trials: int,
-             *args, **kwargs):
+    def tune(
+        self,
+        tf_train: TensorFrame,
+        tf_val: TensorFrame,
+        num_trials: int,
+        *args,
+        **kwargs,
+    ) -> None:
         r"""Fit the model by performing hyperparameter tuning using Optuna. The
         number of trials is specified by num_trials.
 
@@ -85,7 +91,7 @@ def tune(self, tf_train: TensorFrame, tf_val: TensorFrame, num_trials: int,
             raise RuntimeError("tf_train.y must be a Tensor, but None given.")
         if tf_val.y is None:
             raise RuntimeError("tf_val.y must be a Tensor, but None given.")
-        self._tune(tf_train, tf_val, num_trials=num_trials, *args, **kwargs)
+        self._tune(tf_train, tf_val, *args, num_trials=num_trials, **kwargs)
         self._is_fitted = True
 
     def predict(self, tf_test: TensorFrame) -> Tensor:
diff --git a/torch_frame/utils/infer_stype.py b/torch_frame/utils/infer_stype.py
@@ -150,9 +150,9 @@ def infer_series_stype(ser: Series) -> stype | None:
                     try:
                         min_count_list.append(
                             _min_count(
-                                ser.apply(
-                                    lambda row: MultiCategoricalTensorMapper.
-                                    split_by_sep(row, sep)).explode()))
+                                ser.apply(lambda row, sep=sep:
+                                          MultiCategoricalTensorMapper.
+                                          split_by_sep(row, sep)).explode()))
                     except Exception as e:
                         logging.warn(
                             "Mapping series into multicategorical stype "
diff --git a/torch_frame/utils/io.py b/torch_frame/utils/io.py
@@ -110,11 +110,12 @@ def load(
                             "compatible in your case.")
                 match = re.search(r'add_safe_globals\(.*?\)', error_msg)
                 if match is not None:
-                    warnings.warn(f"{warn_msg} Please use "
-                                  f"`torch.serialization.{match.group()}` to "
-                                  f"allowlist this global.")
+                    warnings.warn(
+                        f"{warn_msg} Please use "
+                        f"`torch.serialization.{match.group()}` to "
+                        f"allowlist this global.", stacklevel=2)
                 else:
-                    warnings.warn(warn_msg)
+                    warnings.warn(warn_msg, stacklevel=2)
 
                 tf_dict, col_stats = torch.load(path, weights_only=False)
             else: