-
Notifications
You must be signed in to change notification settings - Fork 101
feat: Compare CrossValidationReports
#1512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
096e3e5
08f78ef
c2d3bf7
12b0840
b917d6f
c9f4282
2ec229b
4242a25
48e374a
175d1fa
d796300
651898d
5f42cfb
a1de5d0
e6367c3
adf1461
0f41c63
f26c7da
106a741
08c1546
0b589a7
9653030
f6798b8
4f04a13
aad6e12
59062ad
0fd33e3
f18e784
6f1d743
ca89bc2
135fae9
ef74c24
10a4477
4a86bb4
d23af94
b8fa36d
dff1404
327b1be
df819ef
1506d42
a04de43
df07fd7
18d33b3
e50cac8
3d773e3
3c30987
878a149
c5f9145
28d3aaf
13a3bef
e856fb8
3a018b9
a179284
5cf0c3b
3a56636
e8e399e
021f519
c8286f6
822d09b
79f6571
bb756d9
458dc72
c8a231e
8272d23
b3e7fff
b6b6c8b
aa2cc2e
a48f4fc
ffbbff0
0b811da
39f18d4
7ea3276
679e6f6
ad5eff4
93fba0c
de94b97
6b8d537
c08d278
c00bfda
635332b
e5726aa
cb73925
17d2357
c3cb78b
ce985e9
560f738
14aa994
f211e50
c904f97
1338319
28835cb
6865c74
64286e9
df5917f
af5ed29
d81c8a4
573f247
6e624fe
5b37f44
fe6b665
8c82921
88a29e4
d23df29
c234727
ce2d793
1ffebf7
0c9dbd6
bbe115f
d40adaf
a33bb72
46eca9a
d7c5d72
a680c93
2aace72
1e8a0dd
e53e0db
0ea12c6
abbf420
3594b04
887ebbf
ecc77d8
683ffa9
df117f1
4ab4b3a
fc280f7
594bdc7
11854fb
7de215d
610c4dc
8232d08
6d10e64
9d9b906
8194931
70c0683
59ecfac
30ca76a
08c77c0
0270707
617b680
8553574
0a2259f
98f755b
3d8b772
9ce3298
4f8e229
8532e06
aee5fd9
c24d953
56b81fc
b58e2a8
5ad3ec6
8170b02
19b6926
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,24 +1,31 @@ | ||||||||||||
| from __future__ import annotations | ||||||||||||
|
|
||||||||||||
| import time | ||||||||||||
| from collections import Counter | ||||||||||||
| from collections.abc import Iterable | ||||||||||||
| from typing import TYPE_CHECKING, Any, Literal, Optional, Union | ||||||||||||
| from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast | ||||||||||||
|
|
||||||||||||
| import joblib | ||||||||||||
| import numpy as np | ||||||||||||
| from numpy.typing import ArrayLike | ||||||||||||
|
|
||||||||||||
| from skore.externals._pandas_accessors import DirNamesMixin | ||||||||||||
| from skore.sklearn._base import _BaseReport | ||||||||||||
| from skore.sklearn._cross_validation.report import CrossValidationReport | ||||||||||||
| from skore.sklearn._estimator.report import EstimatorReport | ||||||||||||
| from skore.utils._progress_bar import progress_decorator | ||||||||||||
|
|
||||||||||||
| if TYPE_CHECKING: | ||||||||||||
| from skore.sklearn._estimator.metrics_accessor import _MetricsAccessor | ||||||||||||
|
|
||||||||||||
| ReportType = Literal["EstimatorReport", "CrossValidationReport"] | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| class ComparisonReport(_BaseReport, DirNamesMixin): | ||||||||||||
| """Report for comparison of instances of :class:`skore.EstimatorReport`. | ||||||||||||
| """Report for comparing reports. | ||||||||||||
|
|
||||||||||||
| This object can be used to compare several :class:`skore.EstimatorReport`s, or | ||||||||||||
| several :class:`~skore.CrossValidationReport`s. | ||||||||||||
|
|
||||||||||||
| .. caution:: Reports passed to `ComparisonReport` are not copied. If you pass | ||||||||||||
| a report to `ComparisonReport`, and then modify the report outside later, it | ||||||||||||
|
|
@@ -28,13 +35,9 @@ class ComparisonReport(_BaseReport, DirNamesMixin): | |||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| reports : list of :class:`~skore.EstimatorReport` instances or dict | ||||||||||||
| Estimator reports to compare. | ||||||||||||
|
|
||||||||||||
| * If `reports` is a list, the class name of each estimator is used. | ||||||||||||
| * If `reports` is a dict, it is expected to have estimator names as keys | ||||||||||||
| and :class:`~skore.EstimatorReport` instances as values. | ||||||||||||
| If the keys are not strings, they will be converted to strings. | ||||||||||||
| reports : list of reports or dict | ||||||||||||
| Reports to compare. If a dict, keys will be used to label the estimators; | ||||||||||||
| if a list, the labels are computed from the estimator class names. | ||||||||||||
|
|
||||||||||||
| n_jobs : int, default=None | ||||||||||||
| Number of jobs to run in parallel. Training the estimators and computing | ||||||||||||
|
|
@@ -46,11 +49,14 @@ class ComparisonReport(_BaseReport, DirNamesMixin): | |||||||||||
|
|
||||||||||||
| Attributes | ||||||||||||
| ---------- | ||||||||||||
| estimator_reports_ : list of :class:`~skore.EstimatorReport` | ||||||||||||
| The compared estimator reports. | ||||||||||||
| reports_ : list of :class:`~skore.EstimatorReport` or list of | ||||||||||||
| :class:`~skore.CrossValidationReport` | ||||||||||||
| The compared reports. | ||||||||||||
|
|
||||||||||||
| report_names_ : list of str | ||||||||||||
| The names of the compared estimator reports. | ||||||||||||
| The names of the compared estimators. If the names are not customized (i.e. the | ||||||||||||
| class names are used), a de-duplication process is used to make sure that the | ||||||||||||
| names are distinct. | ||||||||||||
|
|
||||||||||||
| See Also | ||||||||||||
| -------- | ||||||||||||
|
|
@@ -85,80 +91,168 @@ class ComparisonReport(_BaseReport, DirNamesMixin): | |||||||||||
| ... y_test=y_test | ||||||||||||
| ... ) | ||||||||||||
| >>> report = ComparisonReport([estimator_report_1, estimator_report_2]) | ||||||||||||
| >>> report.report_names_ | ||||||||||||
| ['LogisticRegression_1', 'LogisticRegression_2'] | ||||||||||||
| >>> report = ComparisonReport( | ||||||||||||
| ... {"model1": estimator_report_1, "model2": estimator_report_2} | ||||||||||||
| ... ) | ||||||||||||
| >>> report.report_names_ | ||||||||||||
| ['model1', 'model2'] | ||||||||||||
|
|
||||||||||||
| >>> from sklearn.datasets import make_classification | ||||||||||||
| >>> from sklearn.linear_model import LogisticRegression | ||||||||||||
| >>> from skore import ComparisonReport, CrossValidationReport | ||||||||||||
| >>> X, y = make_classification(random_state=42) | ||||||||||||
| >>> estimator_1 = LogisticRegression() | ||||||||||||
| >>> estimator_2 = LogisticRegression(C=2) # Different regularization | ||||||||||||
| >>> report_1 = CrossValidationReport(estimator_1, X, y) | ||||||||||||
| >>> report_2 = CrossValidationReport(estimator_2, X, y) | ||||||||||||
| >>> report = ComparisonReport([report_1, report_2]) | ||||||||||||
| >>> report = ComparisonReport({"model1": report_1, "model2": report_2}) | ||||||||||||
|
Comment on lines
+101
to
+111
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only docstring where we showcase
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As docstring it is fine. I will check in which example it is natural to use it. |
||||||||||||
| """ | ||||||||||||
|
|
||||||||||||
| _ACCESSOR_CONFIG: dict[str, dict[str, str]] = { | ||||||||||||
| "metrics": {"name": "metrics"}, | ||||||||||||
| } | ||||||||||||
| metrics: _MetricsAccessor | ||||||||||||
|
|
||||||||||||
| def __init__( | ||||||||||||
| self, | ||||||||||||
| reports: Union[list[EstimatorReport], dict[str, EstimatorReport]], | ||||||||||||
| *, | ||||||||||||
| n_jobs: Optional[int] = None, | ||||||||||||
| ) -> None: | ||||||||||||
| """ | ||||||||||||
| ComparisonReport instance initializer. | ||||||||||||
| _reports_type: ReportType | ||||||||||||
|
|
||||||||||||
| @staticmethod | ||||||||||||
| def _validate_reports( | ||||||||||||
| reports: Union[ | ||||||||||||
| list[EstimatorReport], | ||||||||||||
| dict[str, EstimatorReport], | ||||||||||||
| list[CrossValidationReport], | ||||||||||||
| dict[str, CrossValidationReport], | ||||||||||||
| ], | ||||||||||||
| ) -> tuple[ | ||||||||||||
| Union[list[EstimatorReport], list[CrossValidationReport]], | ||||||||||||
| list[str], | ||||||||||||
| ReportType, | ||||||||||||
| ]: | ||||||||||||
auguste-probabl marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
| """Validate that reports are in the right format for comparison. | ||||||||||||
|
|
||||||||||||
| Notes | ||||||||||||
| ----- | ||||||||||||
| We check that the estimator reports can be compared: | ||||||||||||
| - all reports are estimator reports, | ||||||||||||
| - all estimators are in the same ML use case, | ||||||||||||
| - all estimators have non-empty X_test and y_test, | ||||||||||||
| - all estimators have the same X_test and y_test. | ||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| reports : list of reports or dict | ||||||||||||
| The reports to be validated. | ||||||||||||
|
|
||||||||||||
| Returns | ||||||||||||
| ------- | ||||||||||||
| list of EstimatorReport or list of CrossValidationReport | ||||||||||||
| The validated reports. | ||||||||||||
| list of str | ||||||||||||
| The report names, either taken from dict keys or computed from the estimator | ||||||||||||
| class names. | ||||||||||||
| {"EstimatorReport", "CrossValidationReport"} | ||||||||||||
| The inferred type of the reports that will be compared. | ||||||||||||
| """ | ||||||||||||
| if not isinstance(reports, Iterable): | ||||||||||||
| raise TypeError(f"Expected reports to be an iterable; got {type(reports)}") | ||||||||||||
| raise TypeError( | ||||||||||||
| f"Expected reports to be a list or dict; got {type(reports)}" | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| if len(reports) < 2: | ||||||||||||
| raise ValueError("At least 2 instances of EstimatorReport are needed") | ||||||||||||
| raise ValueError( | ||||||||||||
| f"Expected at least 2 reports to compare; got {len(reports)}" | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| report_names = ( | ||||||||||||
| list(map(str, reports.keys())) if isinstance(reports, dict) else None | ||||||||||||
| ) | ||||||||||||
| reports = list(reports.values()) if isinstance(reports, dict) else reports | ||||||||||||
| if isinstance(reports, list): | ||||||||||||
| report_names = None | ||||||||||||
| reports_list = reports | ||||||||||||
| else: # dict | ||||||||||||
| report_names = list(reports.keys()) | ||||||||||||
| for key in report_names: | ||||||||||||
| if not isinstance(key, str): | ||||||||||||
| raise TypeError( | ||||||||||||
| f"Expected all report names to be strings; got {type(key)}" | ||||||||||||
| ) | ||||||||||||
| reports_list = cast( | ||||||||||||
| Union[list[EstimatorReport], list[CrossValidationReport]], | ||||||||||||
| list(reports.values()), | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| if not all(isinstance(report, EstimatorReport) for report in reports): | ||||||||||||
| raise TypeError("Expected instances of EstimatorReport") | ||||||||||||
| reports_type: ReportType | ||||||||||||
| if all(isinstance(report, EstimatorReport) for report in reports_list): | ||||||||||||
| reports_list = cast(list[EstimatorReport], reports_list) | ||||||||||||
| reports_type = "EstimatorReport" | ||||||||||||
|
|
||||||||||||
| # FIXME: We should only check y_test since it is all we need to tell us | ||||||||||||
| # whether we have a distinct ML task at hand. | ||||||||||||
| test_dataset_hashes = { | ||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a FIXME: We should only check that y_test since it the variable to tell us whether
we have a distinct ML task at hand.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same, this constraint of "same test data" is not completely necessary to me
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you want to just do the change?
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added comment
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly. However, I would potentially make it in a subsequent PR for sure. |
||||||||||||
| joblib.hash((report.X_test, report.y_test)) | ||||||||||||
| for report in reports_list | ||||||||||||
| if not ((report.X_test is None) and (report.y_test is None)) | ||||||||||||
| } | ||||||||||||
| if len(test_dataset_hashes) > 1: | ||||||||||||
| raise ValueError( | ||||||||||||
| "Expected all estimators to have the same testing data." | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| elif all(isinstance(report, CrossValidationReport) for report in reports_list): | ||||||||||||
| reports_list = cast(list[CrossValidationReport], reports_list) | ||||||||||||
| reports_type = "CrossValidationReport" | ||||||||||||
| else: | ||||||||||||
| raise TypeError( | ||||||||||||
| f"Expected list or dict of {EstimatorReport.__name__} " | ||||||||||||
| f"or list of dict of {CrossValidationReport.__name__}" | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| test_dataset_hashes = { | ||||||||||||
| joblib.hash((report.X_test, report.y_test)) | ||||||||||||
| for report in reports | ||||||||||||
| if not ((report.X_test is None) and (report.y_test is None)) | ||||||||||||
| } | ||||||||||||
| if len(test_dataset_hashes) > 1: | ||||||||||||
| raise ValueError("Expected all estimators to have the same testing data.") | ||||||||||||
| if len(set(id(report) for report in reports_list)) < len(reports_list): | ||||||||||||
| raise ValueError("Expected reports to be distinct objects") | ||||||||||||
|
Comment on lines
+202
to
+203
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This new constraint results from #1536; indeed, if the same report is passed twice, when we reset the progress bar at the end of the first CVReport computation (see below), when the second CVReport computation starts, the progress object is set to skore/skore/src/skore/utils/_progress_bar.py Lines 85 to 86 in 836f171
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that right now this constraint is only really useful when comparing CVReports, not when comparing EstimatorReports. I didn't put it in the if-block because I can imagine that one day EstimatorReports might have more progress bars of their own. |
||||||||||||
|
|
||||||||||||
| ml_tasks = {report: report._ml_task for report in reports} | ||||||||||||
| ml_tasks = {report: report._ml_task for report in reports_list} | ||||||||||||
| if len(set(ml_tasks.values())) > 1: | ||||||||||||
| raise ValueError( | ||||||||||||
| f"Expected all estimators to have the same ML usecase; got {ml_tasks}" | ||||||||||||
| ) | ||||||||||||
|
|
||||||||||||
| if report_names is None: | ||||||||||||
| self.report_names_ = [report.estimator_name_ for report in reports] | ||||||||||||
| deduped_report_names = _deduplicate_report_names( | ||||||||||||
| [report.estimator_name_ for report in reports_list] | ||||||||||||
| ) | ||||||||||||
| else: | ||||||||||||
| self.report_names_ = report_names | ||||||||||||
| deduped_report_names = report_names | ||||||||||||
|
|
||||||||||||
| return reports_list, deduped_report_names, reports_type | ||||||||||||
|
|
||||||||||||
| self.estimator_reports_ = reports | ||||||||||||
| def __init__( | ||||||||||||
| self, | ||||||||||||
| reports: Union[ | ||||||||||||
| list[EstimatorReport], | ||||||||||||
| dict[str, EstimatorReport], | ||||||||||||
| list[CrossValidationReport], | ||||||||||||
| dict[str, CrossValidationReport], | ||||||||||||
| ], | ||||||||||||
| *, | ||||||||||||
| n_jobs: Optional[int] = None, | ||||||||||||
| ) -> None: | ||||||||||||
| """ | ||||||||||||
| ComparisonReport instance initializer. | ||||||||||||
|
|
||||||||||||
| Notes | ||||||||||||
| ----- | ||||||||||||
| We check that the estimator reports can be compared: | ||||||||||||
| - all reports are estimator reports, | ||||||||||||
| - all estimators are in the same ML use case, | ||||||||||||
| - all estimators have non-empty X_test and y_test, | ||||||||||||
| - all estimators have the same X_test and y_test. | ||||||||||||
| """ | ||||||||||||
| self.reports_, self.report_names_, self._reports_type = ( | ||||||||||||
| ComparisonReport._validate_reports(reports) | ||||||||||||
| ) | ||||||||||||
auguste-probabl marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
|
|
||||||||||||
| # used to know if a parent launches a progress bar manager | ||||||||||||
| self._progress_info: Optional[dict[str, Any]] = None | ||||||||||||
| self._parent_progress = None | ||||||||||||
|
|
||||||||||||
| # NEEDED FOR METRICS ACCESSOR | ||||||||||||
| self.n_jobs = n_jobs | ||||||||||||
| self._rng = np.random.default_rng(time.time_ns()) | ||||||||||||
| self._hash = self._rng.integers( | ||||||||||||
| low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max | ||||||||||||
| ) | ||||||||||||
| self._cache: dict[tuple[Any, ...], Any] = {} | ||||||||||||
| self._ml_task = self.estimator_reports_[0]._ml_task | ||||||||||||
| self._ml_task = self.reports_[0]._ml_task | ||||||||||||
|
|
||||||||||||
| def clear_cache(self) -> None: | ||||||||||||
| """Clear the cache. | ||||||||||||
|
|
@@ -193,7 +287,7 @@ def clear_cache(self) -> None: | |||||||||||
| >>> report._cache | ||||||||||||
| {} | ||||||||||||
| """ | ||||||||||||
| for report in self.estimator_reports_: | ||||||||||||
| for report in self.reports_: | ||||||||||||
| report.clear_cache() | ||||||||||||
| self._cache = {} | ||||||||||||
|
|
||||||||||||
|
|
@@ -222,7 +316,7 @@ def cache_predictions( | |||||||||||
| >>> from sklearn.datasets import make_classification | ||||||||||||
| >>> from sklearn.linear_model import LogisticRegression | ||||||||||||
| >>> from sklearn.model_selection import train_test_split | ||||||||||||
| >>> from skore import ComparisonReport | ||||||||||||
| >>> from skore import ComparisonReport, EstimatorReport | ||||||||||||
| >>> X, y = make_classification(random_state=42) | ||||||||||||
| >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) | ||||||||||||
| >>> estimator_1 = LogisticRegression() | ||||||||||||
|
|
@@ -255,22 +349,21 @@ def cache_predictions( | |||||||||||
| progress = self._progress_info["current_progress"] | ||||||||||||
| main_task = self._progress_info["current_task"] | ||||||||||||
|
|
||||||||||||
| total_estimators = len(self.estimator_reports_) | ||||||||||||
| total_estimators = len(self.reports_) | ||||||||||||
| progress.update(main_task, total=total_estimators) | ||||||||||||
|
|
||||||||||||
| for estimator_report in self.estimator_reports_: | ||||||||||||
| for report in self.reports_: | ||||||||||||
| # Pass the progress manager to child tasks | ||||||||||||
| estimator_report._parent_progress = progress | ||||||||||||
| estimator_report.cache_predictions( | ||||||||||||
| response_methods=response_methods, n_jobs=n_jobs | ||||||||||||
| ) | ||||||||||||
| report._parent_progress = progress | ||||||||||||
| report.cache_predictions(response_methods=response_methods, n_jobs=n_jobs) | ||||||||||||
| progress.update(main_task, advance=1, refresh=True) | ||||||||||||
|
|
||||||||||||
| def get_predictions( | ||||||||||||
| self, | ||||||||||||
| *, | ||||||||||||
| data_source: Literal["train", "test", "X_y"], | ||||||||||||
| response_method: Literal["predict", "predict_proba", "decision_function"], | ||||||||||||
| X: Optional[ArrayLike] = None, | ||||||||||||
| pos_label: Optional[Any] = None, | ||||||||||||
| ) -> ArrayLike: | ||||||||||||
| """Get estimator's predictions. | ||||||||||||
|
|
@@ -290,6 +383,10 @@ def get_predictions( | |||||||||||
| response_method : {"predict", "predict_proba", "decision_function"} | ||||||||||||
| The response method to use. | ||||||||||||
|
|
||||||||||||
| X : array-like of shape (n_samples, n_features), optional | ||||||||||||
| When `data_source` is "X_y", the input features on which to compute the | ||||||||||||
| response method. | ||||||||||||
|
|
||||||||||||
| pos_label : int, float, bool or str, default=None | ||||||||||||
| The positive class when it comes to binary classification. When | ||||||||||||
| `response_method="predict_proba"`, it will select the column corresponding | ||||||||||||
|
|
@@ -343,9 +440,10 @@ def get_predictions( | |||||||||||
| report.get_predictions( | ||||||||||||
| data_source=data_source, | ||||||||||||
| response_method=response_method, | ||||||||||||
| X=X, | ||||||||||||
| pos_label=pos_label, | ||||||||||||
| ) | ||||||||||||
| for report in self.estimator_reports_ | ||||||||||||
| for report in self.reports_ | ||||||||||||
| ] | ||||||||||||
|
|
||||||||||||
| #################################################################################### | ||||||||||||
|
|
@@ -363,3 +461,47 @@ def _get_help_legend(self) -> str: | |||||||||||
| def __repr__(self) -> str: | ||||||||||||
| """Return a string representation.""" | ||||||||||||
| return f"{self.__class__.__name__}(...)" | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def _deduplicate_report_names(report_names: list[str]) -> list[str]: | ||||||||||||
| """De-duplicate report names that appear several times. | ||||||||||||
|
|
||||||||||||
| Leave the other report names alone. | ||||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| report_names : list of str | ||||||||||||
| The list of report names to be checked. | ||||||||||||
|
|
||||||||||||
| Returns | ||||||||||||
| ------- | ||||||||||||
| list of str | ||||||||||||
| The de-duplicated list of report names. | ||||||||||||
|
|
||||||||||||
| Examples | ||||||||||||
| -------- | ||||||||||||
| >>> _deduplicate_report_names(['a', 'b']) | ||||||||||||
| ['a', 'b'] | ||||||||||||
| >>> _deduplicate_report_names(['a', 'a']) | ||||||||||||
| ['a_1', 'a_2'] | ||||||||||||
| >>> _deduplicate_report_names(['a', 'b', 'a']) | ||||||||||||
| ['a_1', 'b', 'a_2'] | ||||||||||||
| >>> _deduplicate_report_names(['a', 'b', 'a', 'b']) | ||||||||||||
| ['a_1', 'b_1', 'a_2', 'b_2'] | ||||||||||||
| >>> _deduplicate_report_names([]) | ||||||||||||
| [] | ||||||||||||
| >>> _deduplicate_report_names(['a']) | ||||||||||||
| ['a'] | ||||||||||||
| """ | ||||||||||||
| counts = Counter(report_names) | ||||||||||||
| if len(report_names) == len(counts): | ||||||||||||
| return report_names | ||||||||||||
|
|
||||||||||||
| names = report_names.copy() | ||||||||||||
| seen: Counter = Counter() | ||||||||||||
| for i in range(len(names)): | ||||||||||||
| name = names[i] | ||||||||||||
| seen[name] += 1 | ||||||||||||
| if counts[name] > 1: | ||||||||||||
| names[i] = f"{name}_{seen[name]}" | ||||||||||||
| return names | ||||||||||||
Uh oh!
There was an error while loading. Please reload this page.