fix: handle sklearn deprecations

desilinguist · desilinguist · commit 44cd390dd0a2 · 2024-02-20T13:30:32.000-05:00
- `needs_proba` and `needs_threshold` arguments for `make_scorer()` are deprecated in favor of `response_method`.
- `LinearSVC` and `LinearSVR` now need to have `dual` explicitly set to "auto".
- `AdaBoostClassifier` now needs to have algorithm` set to "SAMME".
- Update custom metrics documentation.
- Update tests and test data.
diff --git a/doc/custom_metrics.rst b/doc/custom_metrics.rst
@@ -12,15 +12,27 @@ Writing Custom Metric Functions
 
 First, let's look at how to write valid custom metric functions. A valid custom metric function
 must take two array-like positional arguments: the first being the true labels or scores, and the
-second being the predicted labels or scores. This function can also take three optional keyword arguments:
+second being the predicted labels or scores. This function can also take two optional keyword arguments:
 
 1. ``greater_is_better``: a boolean keyword argument that indicates whether a higher value of the metric indicates better performance (``True``) or vice versa (``False``). The default value is ``True``.
-2. ``needs_proba``: a boolean keyword argument that indicates whether the metric function requires probability estimates. The default value is ``False``.
-3. ``needs_threshold``: a boolean keyword argument that indicates whether the metric function takes a continuous decision certainty. The default value is ``False``.
+
+2. ``response_method`` : a string keyword argument that specifies the response method to use to get predictions from an estimator. Possible values are:
+
+   - ``"predict"`` : uses estimator's `predict() <https://scikit-learn.org/stable/glossary.html#term-predict>`__ method to get class labels
+   - ``"predict_proba"`` : uses estimator's `predict_proba() <https://scikit-learn.org/stable/glossary.html#term-predict_proba>`__ method to get class probabilities
+   - ``"decision_function"`` : uses estimator's `decision_function() <https://scikit-learn.org/stable/glossary.html#term-decision_function>`__ method to get continuous decision function values
+   - If the value is a list or tuple of the above strings, it indicates that the scorer should use the first method in the list which is implemented by the estimator.
+   - If the value is ``None``, it is the same as ``"predict"``.
+
+   The default value for ``response_method`` is ``None``.
 
 Note that these keyword arguments are identical to the keyword arguments for the `sklearn.metrics.make_scorer() <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer>`_ function and serve the same purpose.
 
-In short, custom metric functions take two required positional arguments (order matters) and three optional keyword arguments. Here's a simple example of a custom metric function: F\ :sub:`β` with β=0.75 defined in a file called ``custom.py``.
+.. important::
+
+   Previous versions of SKLL offered the ``needs_proba`` and ``needs_threshold`` keyword arguments for custom metrics but these are now deprecated in scikit-learn and replaced by the ``response_method`` keyword argument. To replicate the behavior of ``needs_proba=True``, use ``response_method="predict_proba"`` instead and to replicate ``needs_threshold=True``, use ``response_method=("decision_function", "predict_proba")`` instead.
+
+In short, custom metric functions take two required positional arguments (order matters) and two optional keyword arguments. Here's a simple example of a custom metric function: F\ :sub:`β` with β=0.75 defined in a file called ``custom.py``.
 
 .. code-block:: python
   :caption: custom.py
@@ -30,7 +42,6 @@ In short, custom metric functions take two required positional arguments (order
   def f075(y_true, y_pred):
       return fbeta_score(y_true, y_pred, beta=0.75)
 
-
 Obviously, you may write much more complex functions that aren't directly
 available in scikit-learn. Once you have written your metric function, the next
 step is to use it in your SKLL experiment.
diff --git a/skll/learner/__init__.py b/skll/learner/__init__.py
@@ -252,10 +252,12 @@ def __init__(
                     "produce probabilities, results will not be exactly "
                     "replicable when using SVC and probability mode."
                 )
+        elif issubclass(self._model_type, AdaBoostClassifier):
+            self._model_kwargs["algorithm"] = "SAMME"
+            self._model_kwargs["n_estimators"] = 500
         elif issubclass(
             self._model_type,
             (
-                AdaBoostClassifier,
                 AdaBoostRegressor,
                 BaggingClassifier,
                 BaggingRegressor,
@@ -268,6 +270,8 @@ def __init__(
             self._model_kwargs["n_estimators"] = 500
         elif issubclass(self._model_type, DummyClassifier):
             self._model_kwargs["strategy"] = "prior"
+        elif issubclass(self._model_type, (LinearSVC, LinearSVR)):
+            self._model_kwargs["dual"] = "auto"
         elif issubclass(self._model_type, SVR):
             self._model_kwargs["cache_size"] = 1000
             self._model_kwargs["gamma"] = "scale"
@@ -950,7 +954,7 @@ def train(
                 metrics_module = import_module("skll.metrics")
                 metric_func = getattr(metrics_module, "correlation")
                 _CUSTOM_METRICS[new_grid_objective] = make_scorer(
-                    metric_func, corr_type=grid_objective, needs_proba=True
+                    metric_func, corr_type=grid_objective, response_method="predict_proba"
                 )
                 grid_objective = new_grid_objective
 
diff --git a/skll/metrics.py b/skll/metrics.py
@@ -298,7 +298,7 @@ def register_custom_metric(custom_metric_path: PathOrStr, custom_metric_name: st
     # extract any "special" keyword arguments from the metric function
     metric_func_parameters = signature(metric_func).parameters
     make_scorer_kwargs = {}
-    for make_scorer_kwarg in ["greater_is_better", "needs_proba", "needs_threshold"]:
+    for make_scorer_kwarg in ["greater_is_better", "response_method"]:
         if make_scorer_kwarg in metric_func_parameters:
             parameter = metric_func_parameters.get(make_scorer_kwarg)
             if parameter is not None:
diff --git a/tests/configs/test_send_warnings_to_log.template.cfg b/tests/configs/test_send_warnings_to_log.template.cfg
@@ -4,7 +4,7 @@ task=cross_validate
 
 [Input]
 featuresets=[["test_send_warnings_to_log"]]
-learners=["LinearSVC"]
+learners=["DummyClassifier"]
 suffix=.jsonlines
 num_cv_folds=2
 
diff --git a/tests/other/custom_metrics.py b/tests/other/custom_metrics.py
@@ -1,3 +1,4 @@
+"""Custom metrics for testing purposes."""
 from sklearn.metrics import (
     average_precision_score,
     f1_score,
@@ -8,31 +9,31 @@
 )
 
 
-def f075_macro(y_true, y_pred):
+def f075_macro(y_true, y_pred):  # noqa: D103
     return fbeta_score(y_true, y_pred, beta=0.75, average="macro")
 
 
-def ratio_of_ones(y_true, y_pred):
+def ratio_of_ones(y_true, y_pred):  # noqa: D103
     true_ones = [label for label in y_true if label == 1]
     pred_ones = [label for label in y_pred if label == 1]
     return len(pred_ones) / (len(true_ones) + len(pred_ones))
 
 
-def r2(y_true, y_pred):
+def r2(y_true, y_pred):  # noqa: D103
     return r2_score(y_true, y_pred)
 
 
-def one_minus_precision(y_true, y_pred, greater_is_better=False):
+def one_minus_precision(y_true, y_pred, greater_is_better=False):  # noqa: D103
     return 1 - precision_score(y_true, y_pred, average="binary")
 
 
-def one_minus_f1_macro(y_true, y_pred, greater_is_better=False):
+def one_minus_f1_macro(y_true, y_pred, greater_is_better=False):  # noqa: D103
     return 1 - f1_score(y_true, y_pred, average="macro")
 
 
-def fake_prob_metric(y_true, y_pred, needs_proba=True):
+def fake_prob_metric(y_true, y_pred, response_method="predict_proba"):  # noqa: D103
     return average_precision_score(y_true, y_pred)
 
 
-def fake_prob_metric_multiclass(y_true, y_pred, needs_proba=True):
+def fake_prob_metric_multiclass(y_true, y_pred, response_method="predict_proba"):  # noqa: D103
     return roc_auc_score(y_true, y_pred, average="macro", multi_class="ovo")
diff --git a/tests/test_classification.py b/tests/test_classification.py
@@ -542,7 +542,7 @@ def test_sparse_predict(self):  # noqa: D103
                 (0.45, 0.52),
                 (0.52, 0.5),
                 (0.48, 0.5),
-                (0.49, 0.5),
+                (0.5, 0.5),
                 (0.54, 0.5),
                 (0.43, 0),
                 (0.53, 0.57),
@@ -814,8 +814,8 @@ def check_adaboost_predict(self, base_estimator, algorithm, expected_score):
     def test_adaboost_predict(self):  # noqa: D103
         for base_estimator_name, algorithm, expected_score in zip(
             ["MultinomialNB", "DecisionTreeClassifier", "SGDClassifier", "SVC"],
-            ["SAMME.R", "SAMME.R", "SAMME", "SAMME"],
-            [0.46, 0.52, 0.46, 0.5],
+            ["SAMME", "SAMME", "SAMME", "SAMME"],
+            [0.49, 0.52, 0.46, 0.5],
         ):
             yield self.check_adaboost_predict, base_estimator_name, algorithm, expected_score
 
diff --git a/tests/test_output.py b/tests/test_output.py
@@ -1208,12 +1208,11 @@ def test_send_warnings_to_log(self):
         # Check experiment log output
         # The experiment log file should contain warnings related
         # to the use of sklearn
-        with open(output_dir / "test_send_warnings_to_log_LinearSVC.log") as f:
+        with open(output_dir / "test_send_warnings_to_log_DummyClassifier.log") as f:
             log_content = f.read()
             convergence_sklearn_warning_re = re.compile(
-                r"WARNING - [^\n]+sklearn.svm._base\.py:\d+: ConvergenceWarning:"
-                r"Liblinear failed to converge, increase the number of iterations"
-                r"\."
+                r"WARNING - [^\n]+sklearn.metrics._classification\.py:\d+: "
+                r"UndefinedMetricWarning:Precision is ill-defined and being set to 0.0"
             )
             assert convergence_sklearn_warning_re.search(log_content) is not None