Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions pyaptamer/benchmarking/_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
__author__ = "satvshr"
__all__ = ["Benchmarking"]

from collections import Counter

import numpy as np
import pandas as pd
from sklearn.metrics import make_scorer
Expand Down Expand Up @@ -95,6 +97,28 @@ def _to_scorers(self, metrics):
scorers[name] = make_scorer(metric)
return scorers

def _get_estimator_names(self):
"""Return stable display names for estimators.

If multiple estimators share the same class, append a 1-based index to keep
their result rows distinct.
"""
class_names = [estimator.__class__.__name__ for estimator in self.estimators]
class_counts = Counter(class_names)
class_positions = {class_name: 0 for class_name in class_counts}

names = []
for estimator in self.estimators:
class_name = estimator.__class__.__name__
if class_counts[class_name] == 1:
names.append(class_name)
continue

class_positions[class_name] += 1
names.append(f"{class_name}[{class_positions[class_name]}]")

return names

def _to_df(self, results):
"""Convert nested results to a unified DataFrame."""
records = []
Expand Down Expand Up @@ -127,10 +151,9 @@ def run(self):
"""
self.scorers_ = self._to_scorers(self.metrics)
results = {}
estimator_names = self._get_estimator_names()

for estimator in self.estimators:
est_name = estimator.__class__.__name__

for estimator, est_name in zip(self.estimators, estimator_names, strict=True):
cv_results = cross_validate(
estimator,
self.X,
Expand Down
46 changes: 46 additions & 0 deletions pyaptamer/benchmarking/tests/test_benchmarking_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.metrics import accuracy_score

from pyaptamer.benchmarking._base import Benchmarking


def test_benchmarking_keeps_duplicate_estimator_classes_distinct():
"""Estimators with the same class should not overwrite one another."""
X = np.array([[0], [1], [0], [1], [0], [1], [0], [1]])
y = np.array([0, 1, 0, 1, 0, 1, 0, 1])

bench = Benchmarking(
estimators=[
DummyClassifier(strategy="most_frequent"),
DummyClassifier(strategy="stratified", random_state=0),
],
metrics=[accuracy_score],
X=X,
y=y,
cv=2,
)

summary = bench.run()

assert ("DummyClassifier[1]", "accuracy_score") in summary.index

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this notation wouldn't be right, and could be confusing in downstream usage, user would have to make a call something like this df.loc["DummyClassifier[1]"] , I would suggest to index along with strategy something to make it either like DummyClassifer_1 or based on Pattern should be fine as well.

assert ("DummyClassifier[2]", "accuracy_score") in summary.index
assert len(summary) == 2


def test_benchmarking_preserves_unique_estimator_names():

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this also a problem which you solved or are you adding a random test

"""Different estimator classes should keep their original class names."""
bench = Benchmarking(
estimators=[
DummyClassifier(strategy="most_frequent"),
DummyRegressor(strategy="mean"),
],
metrics=[accuracy_score],
X=np.array([[0], [1]]),
y=np.array([0, 1]),
cv=2,
)

names = bench._get_estimator_names()

assert names == ["DummyClassifier", "DummyRegressor"]
Loading