From c2548c49b5a8e7cc58efa08507d2d9d402fabf6c Mon Sep 17 00:00:00 2001 From: ashum9 Date: Mon, 6 Apr 2026 01:43:12 +0530 Subject: [PATCH 1/4] Fix Benchmarking duplicate estimator labels --- pyaptamer/benchmarking/_base.py | 31 +++++++++++-- .../tests/test_benchmarking_core.py | 46 +++++++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 pyaptamer/benchmarking/tests/test_benchmarking_core.py diff --git a/pyaptamer/benchmarking/_base.py b/pyaptamer/benchmarking/_base.py index 7fc1bdca..58f5f45d 100644 --- a/pyaptamer/benchmarking/_base.py +++ b/pyaptamer/benchmarking/_base.py @@ -95,6 +95,30 @@ def _to_scorers(self, metrics): scorers[name] = make_scorer(metric) return scorers + def _get_estimator_names(self): + """Return stable display names for estimators. + + If multiple estimators share the same class, append a 1-based index to keep + their result rows distinct. + """ + class_names = [estimator.__class__.__name__ for estimator in self.estimators] + class_counts = { + class_name: class_names.count(class_name) for class_name in set(class_names) + } + class_positions = {class_name: 0 for class_name in class_counts} + + names = [] + for estimator in self.estimators: + class_name = estimator.__class__.__name__ + if class_counts[class_name] == 1: + names.append(class_name) + continue + + class_positions[class_name] += 1 + names.append(f"{class_name}[{class_positions[class_name]}]") + + return names + def _to_df(self, results): """Convert nested results to a unified DataFrame.""" records = [] @@ -127,10 +151,11 @@ def run(self): """ self.scorers_ = self._to_scorers(self.metrics) results = {} + estimator_names = self._get_estimator_names() - for estimator in self.estimators: - est_name = estimator.__class__.__name__ - + for estimator, est_name in zip( + self.estimators, estimator_names, strict=False + ): cv_results = cross_validate( estimator, self.X, diff --git a/pyaptamer/benchmarking/tests/test_benchmarking_core.py b/pyaptamer/benchmarking/tests/test_benchmarking_core.py new file mode 100644 index 00000000..c0b682d1 --- /dev/null +++ b/pyaptamer/benchmarking/tests/test_benchmarking_core.py @@ -0,0 +1,46 @@ +import numpy as np +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.metrics import accuracy_score + +from pyaptamer.benchmarking._base import Benchmarking + + +def test_benchmarking_keeps_duplicate_estimator_classes_distinct(): + """Estimators with the same class should not overwrite one another.""" + X = np.array([[0], [1], [0], [1], [0], [1], [0], [1]]) + y = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + + bench = Benchmarking( + estimators=[ + DummyClassifier(strategy="most_frequent"), + DummyClassifier(strategy="stratified", random_state=0), + ], + metrics=[accuracy_score], + X=X, + y=y, + cv=2, + ) + + summary = bench.run() + + assert ("DummyClassifier[1]", "accuracy_score") in summary.index + assert ("DummyClassifier[2]", "accuracy_score") in summary.index + assert len(summary) == 2 + + +def test_benchmarking_preserves_unique_estimator_names(): + """Different estimator classes should keep their original class names.""" + bench = Benchmarking( + estimators=[ + DummyClassifier(strategy="most_frequent"), + DummyRegressor(strategy="mean"), + ], + metrics=[accuracy_score], + X=np.array([[0], [1]]), + y=np.array([0, 1]), + cv=2, + ) + + names = bench._get_estimator_names() + + assert names == ["DummyClassifier", "DummyRegressor"] From b635024aaafa4204c027489d72cae93403b7cc39 Mon Sep 17 00:00:00 2001 From: ashum9 Date: Mon, 6 Apr 2026 02:17:10 +0530 Subject: [PATCH 2/4] MNT: tighten Benchmarking naming logic --- pyaptamer/benchmarking/_base.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyaptamer/benchmarking/_base.py b/pyaptamer/benchmarking/_base.py index 58f5f45d..b4adf659 100644 --- a/pyaptamer/benchmarking/_base.py +++ b/pyaptamer/benchmarking/_base.py @@ -1,6 +1,8 @@ __author__ = "satvshr" __all__ = ["Benchmarking"] +from collections import Counter + import numpy as np import pandas as pd from sklearn.metrics import make_scorer @@ -102,9 +104,7 @@ def _get_estimator_names(self): their result rows distinct. """ class_names = [estimator.__class__.__name__ for estimator in self.estimators] - class_counts = { - class_name: class_names.count(class_name) for class_name in set(class_names) - } + class_counts = Counter(class_names) class_positions = {class_name: 0 for class_name in class_counts} names = [] @@ -153,9 +153,7 @@ def run(self): results = {} estimator_names = self._get_estimator_names() - for estimator, est_name in zip( - self.estimators, estimator_names, strict=False - ): + for estimator, est_name in zip(self.estimators, estimator_names, strict=True): cv_results = cross_validate( estimator, self.X, From 45ff5b25504df4310865810c9334ef6b13050f4d Mon Sep 17 00:00:00 2001 From: ashum9 Date: Sat, 11 Apr 2026 03:00:13 +0530 Subject: [PATCH 3/4] MNT: use underscore suffix for duplicate estimator labels --- pyaptamer/benchmarking/_base.py | 6 +++--- pyaptamer/benchmarking/tests/test_benchmarking_core.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyaptamer/benchmarking/_base.py b/pyaptamer/benchmarking/_base.py index b4adf659..ec3de8d8 100644 --- a/pyaptamer/benchmarking/_base.py +++ b/pyaptamer/benchmarking/_base.py @@ -100,8 +100,8 @@ def _to_scorers(self, metrics): def _get_estimator_names(self): """Return stable display names for estimators. - If multiple estimators share the same class, append a 1-based index to keep - their result rows distinct. + If multiple estimators share the same class, append a deterministic 1-based + suffix (e.g., ``DummyClassifier_1``) to keep their result rows distinct. """ class_names = [estimator.__class__.__name__ for estimator in self.estimators] class_counts = Counter(class_names) @@ -115,7 +115,7 @@ def _get_estimator_names(self): continue class_positions[class_name] += 1 - names.append(f"{class_name}[{class_positions[class_name]}]") + names.append(f"{class_name}_{class_positions[class_name]}") return names diff --git a/pyaptamer/benchmarking/tests/test_benchmarking_core.py b/pyaptamer/benchmarking/tests/test_benchmarking_core.py index c0b682d1..04b1548b 100644 --- a/pyaptamer/benchmarking/tests/test_benchmarking_core.py +++ b/pyaptamer/benchmarking/tests/test_benchmarking_core.py @@ -23,8 +23,8 @@ def test_benchmarking_keeps_duplicate_estimator_classes_distinct(): summary = bench.run() - assert ("DummyClassifier[1]", "accuracy_score") in summary.index - assert ("DummyClassifier[2]", "accuracy_score") in summary.index + assert ("DummyClassifier_1", "accuracy_score") in summary.index + assert ("DummyClassifier_2", "accuracy_score") in summary.index assert len(summary) == 2 From ea7cb09fae78dae020db8ed3a6eb71d07ee6847e Mon Sep 17 00:00:00 2001 From: ashum9 Date: Mon, 13 Apr 2026 14:59:05 +0530 Subject: [PATCH 4/4] MNT: satisfy ruff pre-commit in Benchmarking --- pyaptamer/benchmarking/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaptamer/benchmarking/_base.py b/pyaptamer/benchmarking/_base.py index ec3de8d8..18d1c870 100644 --- a/pyaptamer/benchmarking/_base.py +++ b/pyaptamer/benchmarking/_base.py @@ -105,7 +105,7 @@ def _get_estimator_names(self): """ class_names = [estimator.__class__.__name__ for estimator in self.estimators] class_counts = Counter(class_names) - class_positions = {class_name: 0 for class_name in class_counts} + class_positions = dict.fromkeys(class_counts, 0) names = [] for estimator in self.estimators: