Fix warnings from scikit-learn 1.8+ and introduce FixedPipeline (#850)

bruAristimunha · web-flow · commit a3e6db41006a · 2025-12-09T13:10:11.000-08:00
* p

* fixing number
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -31,7 +31,7 @@ Version 1.5  (Source - GitHub)
 
 ⚕️ Code health
 ^^^^^^^^^^^^^^
-
+- Fixing warnings from the latest scikit-learn version within the Preprocessing logic (:gh:`850` by `Bruno Aristimunha`_)
 
 
 Version 1.4.2  (Stable - PyPi)
diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py
@@ -15,10 +15,9 @@
 import mne_bids
 import numpy as np
 import pandas as pd
-from sklearn.pipeline import Pipeline
 
 from moabb.datasets.bids_interface import StepType, _interface_map
-from moabb.datasets.preprocessing import SetRawAnnotations
+from moabb.datasets.preprocessing import FixedPipeline, SetRawAnnotations
 
 
 log = logging.getLogger(__name__)
@@ -393,7 +392,7 @@ def __init__(
         self.unit_factor = unit_factor
 
     def _create_process_pipeline(self):
-        return Pipeline(
+        return FixedPipeline(
             [
                 (
                     StepType.RAW,
@@ -620,7 +619,7 @@ def _get_single_subject_data_using_cache(
                     self,
                     subject,
                     path=cache_config.path,
-                    process_pipeline=Pipeline(cached_steps),
+                    process_pipeline=FixedPipeline(cached_steps),
                     verbose=cache_config.verbose,
                 )
 
@@ -667,7 +666,7 @@ def _get_single_subject_data_using_cache(
                         self,
                         subject,
                         path=cache_config.path,
-                        process_pipeline=Pipeline(
+                        process_pipeline=FixedPipeline(
                             cached_steps + remaining_steps[: step_idx + 1]
                         ),
                         verbose=cache_config.verbose,
diff --git a/moabb/datasets/preprocessing.py b/moabb/datasets/preprocessing.py
@@ -6,13 +6,50 @@
 import mne
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.pipeline import FunctionTransformer, Pipeline
+from sklearn.pipeline import FunctionTransformer, Pipeline, _name_estimators
 from sklearn.utils._repr_html.estimator import _VisualBlock
 
 
 log = logging.getLogger(__name__)
 
 
+class FixedPipeline(Pipeline):
+    """A Pipeline that is always considered fitted.
+
+    This is useful for pre-processing pipelines that don't require fitting,
+    as they only apply fixed transformations (e.g., filtering, epoching).
+    This avoids the FutureWarning from sklearn 1.8+ about unfitted pipelines.
+    """
+
+    def __sklearn_is_fitted__(self):
+        """Return True to indicate this pipeline is always considered fitted."""
+        return True
+
+
+def make_fixed_pipeline(*steps, memory=None, verbose=False):
+    """Create a FixedPipeline that is always considered fitted.
+
+    This is a drop-in replacement for sklearn's make_pipeline that creates
+    a pipeline marked as fitted, suitable for fixed transformations.
+
+    Parameters
+    ----------
+    *steps : list of estimators
+        List of (name, transform) tuples that are chained in the pipeline.
+    memory : str or object with the joblib.Memory interface, default=None
+        Used to cache the fitted transformers of the pipeline.
+    verbose : bool, default=False
+        If True, the time elapsed while fitting each step will be printed.
+
+    Returns
+    -------
+    p : FixedPipeline
+        A FixedPipeline object.
+    """
+
+    return FixedPipeline(_name_estimators(steps), memory=memory, verbose=verbose)
+
+
 def _is_none_pipeline(pipeline):
     """Check if a pipeline is the result of make_pipeline(None)"""
     return (
@@ -44,6 +81,11 @@ def transform(self, X, y=None):
     def fit(self, X, y=None):
         for _, t in self.transformers:
             t.fit(X)
+        return self
+
+    def __sklearn_is_fitted__(self):
+        """Return True to indicate this transformer is always considered fitted."""
+        return True
 
     def _sk_visual_block_(self):
         """Tell sklearn’s diagrammer to lay us out in parallel."""
@@ -65,7 +107,11 @@ def __init__(self):
         # when using the pipeline
 
     def fit(self, X, y=None):
-        pass
+        return self
+
+    def __sklearn_is_fitted__(self):
+        """Return True to indicate this transformer is always considered fitted."""
+        return True
 
     def _sk_visual_block_(self):
         """Tell sklearn’s diagrammer to lay us out in parallel."""
@@ -103,6 +149,7 @@ class SetRawAnnotations(FixedTransformer):
     """
 
     def __init__(self, event_id, interval: Tuple[float, float]):
+        super().__init__()
         assert isinstance(event_id, dict)  # not None
         self.event_id = event_id
         values = _get_event_id_values(self.event_id)
@@ -153,6 +200,7 @@ class RawToEvents(FixedTransformer):
     """
 
     def __init__(self, event_id: dict[str, int], interval: Tuple[float, float]):
+        super().__init__()
         assert isinstance(event_id, dict)  # not None
         self.event_id = event_id
         self.interval = interval
@@ -212,6 +260,7 @@ def __init__(
         stop_offset,
         marker=1,
     ):
+        super().__init__()
         self.length = length
         self.stride = stride
         self.start_offset = start_offset
@@ -245,12 +294,16 @@ def transform(self, raw: mne.io.BaseRaw, y=None):
 
 
 class EpochsToEvents(FixedTransformer):
+    def __init__(self):
+        super().__init__()
+
     def transform(self, epochs, y=None):
         return epochs.events
 
 
 class EventsToLabels(FixedTransformer):
     def __init__(self, event_id):
+        super().__init__()
         self.event_id = event_id
 
     def transform(self, events, y=None):
@@ -269,6 +322,7 @@ def __init__(
         channels: List[str] = None,
         interpolate_missing_channels: bool = False,
     ):
+        super().__init__()
         assert isinstance(event_id, dict)  # not None
         self.event_id = event_id
         self.tmin = tmin
diff --git a/moabb/paradigms/base.py b/moabb/paradigms/base.py
@@ -6,21 +6,23 @@
 import mne
 import numpy as np
 import pandas as pd
-from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import FunctionTransformer
 
 from moabb.datasets.base import BaseDataset
 from moabb.datasets.bids_interface import StepType
 from moabb.datasets.preprocessing import (
     EpochsToEvents,
     EventsToLabels,
+    FixedPipeline,
     ForkPipelines,
     RawToEpochs,
     RawToEvents,
     SetRawAnnotations,
     get_crop_pipeline,
     get_filter_pipeline,
     get_resample_pipeline,
+    make_fixed_pipeline,
 )
 
 
@@ -203,20 +205,20 @@ def make_process_pipelines(
                     ]
                 )
                 steps.append((StepType.ARRAY, array_events_pipeline))
-            process_pipelines.append(Pipeline(steps))
+            process_pipelines.append(FixedPipeline(steps))
         return process_pipelines
 
     def make_labels_pipeline(self, dataset, return_epochs=False, return_raws=False):
         """Returns the pipeline that extracts the labels from the
         output of the postprocess_pipeline.
         Refer to the arguments of :func:`get_data` for more information."""
         if return_epochs:
-            labels_pipeline = make_pipeline(
+            labels_pipeline = make_fixed_pipeline(
                 EpochsToEvents(),
                 EventsToLabels(event_id=self.used_events(dataset)),
             )
         elif return_raws:
-            labels_pipeline = make_pipeline(
+            labels_pipeline = make_fixed_pipeline(
                 self._get_events_pipeline(dataset),
                 EventsToLabels(event_id=self.used_events(dataset)),
             )
@@ -424,10 +426,10 @@ def _get_epochs_pipeline(self, return_epochs, return_raws, dataset):
         steps.append(
             (
                 "epoching",
-                make_pipeline(
+                make_fixed_pipeline(
                     ForkPipelines(
                         [
-                            ("raw", make_pipeline(None)),
+                            ("raw", make_fixed_pipeline(None)),
                             ("events", self._get_events_pipeline(dataset)),
                         ]
                     ),
@@ -448,7 +450,7 @@ def _get_epochs_pipeline(self, return_epochs, return_raws, dataset):
             steps.append(("resample", get_resample_pipeline(self.resample)))
         if return_epochs:  # needed to concatenate epochs
             steps.append(("load_data", FunctionTransformer(methodcaller("load_data"))))
-        return Pipeline(steps)
+        return FixedPipeline(steps)
 
     def _get_array_pipeline(
         self, return_epochs, return_raws, dataset, processing_pipeline
@@ -466,7 +468,7 @@ def _get_array_pipeline(
             steps.append(("postprocess_pipeline", processing_pipeline))
         if len(steps) == 0:
             return None
-        return Pipeline(steps)
+        return FixedPipeline(steps)
 
     def match_all(
         self,