diff --git a/pyaptamer/benchmarking/_base.py b/pyaptamer/benchmarking/_base.py index 7fc1bdca..18d1c870 100644 --- a/pyaptamer/benchmarking/_base.py +++ b/pyaptamer/benchmarking/_base.py @@ -1,6 +1,8 @@ __author__ = "satvshr" __all__ = ["Benchmarking"] +from collections import Counter + import numpy as np import pandas as pd from sklearn.metrics import make_scorer @@ -95,6 +97,28 @@ def _to_scorers(self, metrics): scorers[name] = make_scorer(metric) return scorers + def _get_estimator_names(self): + """Return stable display names for estimators. + + If multiple estimators share the same class, append a deterministic 1-based + suffix (e.g., ``DummyClassifier_1``) to keep their result rows distinct. + """ + class_names = [estimator.__class__.__name__ for estimator in self.estimators] + class_counts = Counter(class_names) + class_positions = dict.fromkeys(class_counts, 0) + + names = [] + for estimator in self.estimators: + class_name = estimator.__class__.__name__ + if class_counts[class_name] == 1: + names.append(class_name) + continue + + class_positions[class_name] += 1 + names.append(f"{class_name}_{class_positions[class_name]}") + + return names + def _to_df(self, results): """Convert nested results to a unified DataFrame.""" records = [] @@ -127,10 +151,9 @@ def run(self): """ self.scorers_ = self._to_scorers(self.metrics) results = {} + estimator_names = self._get_estimator_names() - for estimator in self.estimators: - est_name = estimator.__class__.__name__ - + for estimator, est_name in zip(self.estimators, estimator_names, strict=True): cv_results = cross_validate( estimator, self.X, diff --git a/pyaptamer/benchmarking/tests/test_benchmarking_core.py b/pyaptamer/benchmarking/tests/test_benchmarking_core.py new file mode 100644 index 00000000..04b1548b --- /dev/null +++ b/pyaptamer/benchmarking/tests/test_benchmarking_core.py @@ -0,0 +1,46 @@ +import numpy as np +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.metrics import accuracy_score + +from pyaptamer.benchmarking._base import Benchmarking + + +def test_benchmarking_keeps_duplicate_estimator_classes_distinct(): + """Estimators with the same class should not overwrite one another.""" + X = np.array([[0], [1], [0], [1], [0], [1], [0], [1]]) + y = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + + bench = Benchmarking( + estimators=[ + DummyClassifier(strategy="most_frequent"), + DummyClassifier(strategy="stratified", random_state=0), + ], + metrics=[accuracy_score], + X=X, + y=y, + cv=2, + ) + + summary = bench.run() + + assert ("DummyClassifier_1", "accuracy_score") in summary.index + assert ("DummyClassifier_2", "accuracy_score") in summary.index + assert len(summary) == 2 + + +def test_benchmarking_preserves_unique_estimator_names(): + """Different estimator classes should keep their original class names.""" + bench = Benchmarking( + estimators=[ + DummyClassifier(strategy="most_frequent"), + DummyRegressor(strategy="mean"), + ], + metrics=[accuracy_score], + X=np.array([[0], [1]]), + y=np.array([0, 1]), + cv=2, + ) + + names = bench._get_estimator_names() + + assert names == ["DummyClassifier", "DummyRegressor"]