Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Version 1.5 (Source - GitHub)

⚕️ Code health
^^^^^^^^^^^^^^

- Fixing warnings from the latest scikit-learn version within the Preprocessing logic (:gh:`850` by `Bruno Aristimunha`_)


Version 1.4.2 (Stable - PyPi)
Expand Down
9 changes: 4 additions & 5 deletions moabb/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
import mne_bids
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline

from moabb.datasets.bids_interface import StepType, _interface_map
from moabb.datasets.preprocessing import SetRawAnnotations
from moabb.datasets.preprocessing import FixedPipeline, SetRawAnnotations


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -393,7 +392,7 @@ def __init__(
self.unit_factor = unit_factor

def _create_process_pipeline(self):
return Pipeline(
return FixedPipeline(
[
(
StepType.RAW,
Expand Down Expand Up @@ -620,7 +619,7 @@ def _get_single_subject_data_using_cache(
self,
subject,
path=cache_config.path,
process_pipeline=Pipeline(cached_steps),
process_pipeline=FixedPipeline(cached_steps),
verbose=cache_config.verbose,
)

Expand Down Expand Up @@ -667,7 +666,7 @@ def _get_single_subject_data_using_cache(
self,
subject,
path=cache_config.path,
process_pipeline=Pipeline(
process_pipeline=FixedPipeline(
cached_steps + remaining_steps[: step_idx + 1]
),
verbose=cache_config.verbose,
Expand Down
58 changes: 56 additions & 2 deletions moabb/datasets/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,50 @@
import mne
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FunctionTransformer, Pipeline
from sklearn.pipeline import FunctionTransformer, Pipeline, _name_estimators
from sklearn.utils._repr_html.estimator import _VisualBlock


log = logging.getLogger(__name__)


class FixedPipeline(Pipeline):
"""A Pipeline that is always considered fitted.
This is useful for pre-processing pipelines that don't require fitting,
as they only apply fixed transformations (e.g., filtering, epoching).
This avoids the FutureWarning from sklearn 1.8+ about unfitted pipelines.
"""

def __sklearn_is_fitted__(self):
"""Return True to indicate this pipeline is always considered fitted."""
return True


def make_fixed_pipeline(*steps, memory=None, verbose=False):
"""Create a FixedPipeline that is always considered fitted.
This is a drop-in replacement for sklearn's make_pipeline that creates
a pipeline marked as fitted, suitable for fixed transformations.
Parameters
----------
*steps : list of estimators
List of (name, transform) tuples that are chained in the pipeline.
Copy link

Copilot AI Dec 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parameter documentation is incorrect. The function uses _name_estimators(steps) which means it expects bare estimators, not tuples. The docstring should say:

*steps : list of estimators
    List of estimators that are chained in the pipeline. These are automatically named.

This matches sklearn's make_pipeline behavior, which takes estimators directly rather than named tuples (unlike the Pipeline constructor).

Suggested change
List of (name, transform) tuples that are chained in the pipeline.
List of estimators that are chained in the pipeline. These are automatically named.

Copilot uses AI. Check for mistakes.
memory : str or object with the joblib.Memory interface, default=None
Used to cache the fitted transformers of the pipeline.
verbose : bool, default=False
If True, the time elapsed while fitting each step will be printed.
Returns
-------
p : FixedPipeline
A FixedPipeline object.
"""

return FixedPipeline(_name_estimators(steps), memory=memory, verbose=verbose)


def _is_none_pipeline(pipeline):
"""Check if a pipeline is the result of make_pipeline(None)"""
return (
Expand Down Expand Up @@ -44,6 +81,11 @@ def transform(self, X, y=None):
def fit(self, X, y=None):
for _, t in self.transformers:
t.fit(X)
return self

def __sklearn_is_fitted__(self):
"""Return True to indicate this transformer is always considered fitted."""
return True

def _sk_visual_block_(self):
"""Tell sklearn’s diagrammer to lay us out in parallel."""
Expand All @@ -65,7 +107,11 @@ def __init__(self):
# when using the pipeline

def fit(self, X, y=None):
pass
return self

def __sklearn_is_fitted__(self):
"""Return True to indicate this transformer is always considered fitted."""
return True

def _sk_visual_block_(self):
"""Tell sklearn’s diagrammer to lay us out in parallel."""
Expand Down Expand Up @@ -103,6 +149,7 @@ class SetRawAnnotations(FixedTransformer):
"""

def __init__(self, event_id, interval: Tuple[float, float]):
super().__init__()
assert isinstance(event_id, dict) # not None
self.event_id = event_id
values = _get_event_id_values(self.event_id)
Expand Down Expand Up @@ -153,6 +200,7 @@ class RawToEvents(FixedTransformer):
"""

def __init__(self, event_id: dict[str, int], interval: Tuple[float, float]):
super().__init__()
assert isinstance(event_id, dict) # not None
self.event_id = event_id
self.interval = interval
Expand Down Expand Up @@ -212,6 +260,7 @@ def __init__(
stop_offset,
marker=1,
):
super().__init__()
self.length = length
self.stride = stride
self.start_offset = start_offset
Expand Down Expand Up @@ -245,12 +294,16 @@ def transform(self, raw: mne.io.BaseRaw, y=None):


class EpochsToEvents(FixedTransformer):
def __init__(self):
super().__init__()

def transform(self, epochs, y=None):
return epochs.events


class EventsToLabels(FixedTransformer):
def __init__(self, event_id):
super().__init__()
self.event_id = event_id

def transform(self, events, y=None):
Expand All @@ -269,6 +322,7 @@ def __init__(
channels: List[str] = None,
interpolate_missing_channels: bool = False,
):
super().__init__()
assert isinstance(event_id, dict) # not None
self.event_id = event_id
self.tmin = tmin
Expand Down
18 changes: 10 additions & 8 deletions moabb/paradigms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@
import mne
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

from moabb.datasets.base import BaseDataset
from moabb.datasets.bids_interface import StepType
from moabb.datasets.preprocessing import (
EpochsToEvents,
EventsToLabels,
FixedPipeline,
ForkPipelines,
RawToEpochs,
RawToEvents,
SetRawAnnotations,
get_crop_pipeline,
get_filter_pipeline,
get_resample_pipeline,
make_fixed_pipeline,
)


Expand Down Expand Up @@ -203,20 +205,20 @@ def make_process_pipelines(
]
)
steps.append((StepType.ARRAY, array_events_pipeline))
process_pipelines.append(Pipeline(steps))
process_pipelines.append(FixedPipeline(steps))
return process_pipelines

def make_labels_pipeline(self, dataset, return_epochs=False, return_raws=False):
"""Returns the pipeline that extracts the labels from the
output of the postprocess_pipeline.
Refer to the arguments of :func:`get_data` for more information."""
if return_epochs:
labels_pipeline = make_pipeline(
labels_pipeline = make_fixed_pipeline(
EpochsToEvents(),
EventsToLabels(event_id=self.used_events(dataset)),
)
elif return_raws:
labels_pipeline = make_pipeline(
labels_pipeline = make_fixed_pipeline(
self._get_events_pipeline(dataset),
EventsToLabels(event_id=self.used_events(dataset)),
)
Expand Down Expand Up @@ -424,10 +426,10 @@ def _get_epochs_pipeline(self, return_epochs, return_raws, dataset):
steps.append(
(
"epoching",
make_pipeline(
make_fixed_pipeline(
ForkPipelines(
[
("raw", make_pipeline(None)),
("raw", make_fixed_pipeline(None)),
("events", self._get_events_pipeline(dataset)),
]
),
Expand All @@ -448,7 +450,7 @@ def _get_epochs_pipeline(self, return_epochs, return_raws, dataset):
steps.append(("resample", get_resample_pipeline(self.resample)))
if return_epochs: # needed to concatenate epochs
steps.append(("load_data", FunctionTransformer(methodcaller("load_data"))))
return Pipeline(steps)
return FixedPipeline(steps)

def _get_array_pipeline(
self, return_epochs, return_raws, dataset, processing_pipeline
Expand All @@ -466,7 +468,7 @@ def _get_array_pipeline(
steps.append(("postprocess_pipeline", processing_pipeline))
if len(steps) == 0:
return None
return Pipeline(steps)
return FixedPipeline(steps)

def match_all(
self,
Expand Down
Loading