mne-tools · sappelhoff · Apr 16, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/CITATION.cff b/CITATION.cff
@@ -206,6 +206,10 @@ authors:
       family-names: Gerçek
       affiliation: 'University of Geneva, Department of Fundamental Neuroscience'
       orcid: 'https://orcid.org/0000-0003-1063-6769'
+    - given-names: Matthias
+      family-names: Dold
+      affiliation: 'Donders Institute for Brain, Cognition and Behaviour, Radboud University, Nijmegen, Netherlands'
+      orcid: 'https://orcid.org/0009-0003-1477-4912'
     - given-names: Alexandre
       family-names: Gramfort
       affiliation: 'Université Paris-Saclay, Inria, CEA, Palaiseau, France'

diff --git a/doc/authors.rst b/doc/authors.rst
@@ -35,6 +35,7 @@
 .. _Mara Wolter: https://github.com/marakw
 .. _Marijn van Vliet: https://github.com/wmvanvliet
 .. _Mathieu Scheltienne: https://github.com/mscheltienne
+.. _Matthias Dold: https://github.com/matthiasdold
 .. _Matt Sanderson: https://github.com/monkeyman192
 .. _Maximilien Chaumon: https://github.com/dnacombo
 .. _Moritz Gerster: http://moritz-gerster.com

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -20,6 +20,7 @@ The following authors contributed for the first time. Thank you so much! 🤩
 * `Christian O'Reilly`_
 * `Berk Gerçek`_
 * `Arne Gottwald`_
+* `Matthias Dold`_
 
 The following authors had contributed before. Thank you for sticking around! 🤘
 
@@ -39,6 +40,7 @@ Detailed list of changes
 - Empty-room matching now preferentially finds recordings in the subject directory tagged as `task-noise` before looking in the `sub-emptyroom` directories. This adds support for a part of the BIDS specification for ER recordings, by `Berk Gerçek`_ (:gh:`1364`)
 - Path matching is now implemenented in a more efficient manner within :meth:`mne_bids.BIDSPath.match()` and :func:`mne_bids.find_matching_paths()`, by `Arne Gottwald` (:gh:`1355`)
 - :func:`mne_bids.get_entity_vals()` has a new parameter ``include_match`` to prefilter item matching and ignore non-matched items from begin of directory scan, by `Arne Gottwald` (:gh:`1355`)
+- Data from ``events.tsv`` can now be read into an OrderedDict using :func:`mne_bids.events_file_to_annotation_kwargs()`, by `Matthias Dold` (:gh:`1389`)
 
 
 🧐 API and behavior changes

diff --git a/mne_bids/__init__.py b/mne_bids/__init__.py
@@ -22,7 +22,11 @@
     get_bids_path_from_fname,
     find_matching_paths,
 )
-from mne_bids.read import get_head_mri_trans, read_raw_bids
+from mne_bids.read import (
+    get_head_mri_trans,
+    read_raw_bids,
+    events_file_to_annotation_kwargs,
+)
 from mne_bids.utils import get_anonymization_daysback
 from mne_bids.write import (
     make_dataset_description,

diff --git a/mne_bids/read.py b/mne_bids/read.py
@@ -523,8 +523,70 @@ def _handle_info_reading(sidecar_fname, raw):
     return raw
 
 
-def _handle_events_reading(events_fname, raw):
-    """Read associated events.tsv and convert valid events to annotations on Raw."""
+def events_file_to_annotation_kwargs(events_fname: str | Path) -> dict:
+    r"""
+    Read the `events.tsv` file and extract onset, duration, and description.
+
+    This function reads an events file in TSV format and extracts the onset,
+    duration, and description of events.
+
+    Parameters
+    ----------
+    events_fname : str
+        The file path to the `events.tsv` file.
+
+    Returns
+    -------
+    dict
+        A dictionary containing the following keys:
+        - 'onset' : np.ndarray
+            The onset times of the events in seconds.
+        - 'duration' : np.ndarray
+            The durations of the events in seconds.
+        - 'description' : np.ndarray
+            The descriptions of the events.
+        - 'event_id' : dict
+            A dictionary mapping event descriptions to integer event IDs.
+
+    Notes
+    -----
+    The function handles the following cases:
+    - If the `trial_type` column is available, it uses it for event descriptions.
+    - If the `stim_type` column is available, it uses it for backward compatibility.
+    - If the `value` column is available, it uses it to create the `event_id`.
+    - If none of the above columns are available, it defaults to using 'n/a' for
+      descriptions and 1 for event IDs.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from pathlib import Path
+    >>> import tempfile
+    >>>
+    >>> # Create a sample DataFrame
+    >>> data = {
+    ...     'onset': [0.1, 0.2, 0.3],
+    ...     'duration': [0.1, 0.1, 0.1],
+    ...     'trial_type': ['event1', 'event2', 'event1'],
+    ...     'value': [1, 2, 1],
+    ...     'sample': [10, 20, 30]
+    ... }
+    >>> df = pd.DataFrame(data)
+    >>>
+    >>> # Write the DataFrame to a temporary file
+    >>> temp_dir = tempfile.gettempdir()
+    >>> events_file = Path(temp_dir) / 'events.tsv'
+    >>> df.to_csv(events_file, sep='\t', index=False)
+    >>>
+    >>> # Read the events file using the function
+    >>> events_dict = events_file_to_annotation_kwargs(events_file)
+    >>> events_dict
+    {'onset': array([0.1, 0.2, 0.3]),
+    'duration': array([0.1, 0.1, 0.1]),
+    'description': array(['event1', 'event2', 'event1'], dtype='<U6'),
+    'event_id': {'event1': 1, 'event2': 2}}
+
+    """
     logger.info(f"Reading events from {events_fname}.")
     events_dict = _from_tsv(events_fname)
 
@@ -601,9 +663,21 @@ def _handle_events_reading(events_fname, raw):
         [0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float
     )
 
+    return {"onset": ons, "duration": durs, "description": descrs, "event_id": event_id}
+
+
+def _handle_events_reading(events_fname, raw):
+    """Read associated events.tsv and convert valid events to annotations on Raw."""
+    annotations_info = events_file_to_annotation_kwargs(events_fname)
+    event_id = annotations_info["event_id"]
+
     # Add events as Annotations, but keep essential Annotations present in raw file
     annot_from_raw = raw.annotations.copy()
-    annot_from_events = mne.Annotations(onset=ons, duration=durs, description=descrs)
+    annot_from_events = mne.Annotations(
+        onset=annotations_info["onset"],
+        duration=annotations_info["duration"],
+        description=annotations_info["description"],
+    )
     raw.set_annotations(annot_from_events)
 
     annot_idx_to_keep = [

diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
@@ -14,6 +14,7 @@
 
 import mne
 import numpy as np
+import pandas as pd
 import pytest
 from mne.datasets import testing
 from mne.io.constants import FIFF
@@ -32,6 +33,7 @@
     _handle_events_reading,
     _handle_scans_reading,
     _read_raw,
+    events_file_to_annotation_kwargs,
     get_head_mri_trans,
     read_raw_bids,
 )
@@ -1466,3 +1468,76 @@ def test_gsr_and_temp_reading():
     raw = read_raw_bids(bids_path)
     assert raw.get_channel_types(["GSR"]) == ["gsr"]
     assert raw.get_channel_types(["Temperature"]) == ["temperature"]
+
+
+def test_events_file_to_annotation_kwargs(tmp_path):
+    """Test that events file is read correctly."""
+    bids_path = BIDSPath(
+        subject="01", session="eeg", task="rest", datatype="eeg", root=tiny_bids_root
+    )
+    events_fname = _find_matching_sidecar(bids_path, suffix="events", extension=".tsv")
+
+    # ---------------- plain read --------------------------------------------
+    df = pd.read_csv(events_fname, sep="\t")
+    ev_kwargs = events_file_to_annotation_kwargs(events_fname=events_fname)
+    assert (ev_kwargs["onset"] == df["onset"].values).all()
+    assert (ev_kwargs["duration"] == df["duration"].values).all()
+    assert (ev_kwargs["description"] == df["trial_type"].values).all()
+
+    # ---------------- filtering out n/a values ------------------------------
+    tmp_tsv_file = tmp_path / "events.tsv"
+    dext = pd.concat(
+        [df.copy().assign(onset=df.onset + i) for i in range(5)]
+    ).reset_index(drop=True)
+
+    dext = dext.assign(
+        ix=range(len(dext)),
+        value=dext.trial_type.map({"start_experiment": 1, "show_stimulus": 2}),
+        duration=1.0,
+    )
+
+    # nan values for `_drop` must be string values, `_drop` is called on
+    # `onset`, `value` and `trial_type`. `duration` n/a should end up as float 0
+    for c in ["onset", "value", "trial_type", "duration"]:
+        dext[c] = dext[c].astype(str)
+
+    dext.loc[0, "onset"] = "n/a"
+    dext.loc[1, "duration"] = "n/a"
+    dext.loc[4, "trial_type"] = "n/a"
+    dext.loc[4, "value"] = (
+        "n/a"  # to check that filtering is also applied when we drop the `trial_type`
+    )
+    dext.to_csv(tmp_tsv_file, sep="\t", index=False)
+
+    ev_kwargs_filtered = events_file_to_annotation_kwargs(events_fname=tmp_tsv_file)
+
+    dext_f = dext[
+        (dext["onset"] != "n/a")
+        & (dext["trial_type"] != "n/a")
+        & (dext["value"] != "n/a")
+    ]
+
+    assert (ev_kwargs_filtered["onset"] == dext_f["onset"].astype(float).values).all()
+    assert (
+        ev_kwargs_filtered["duration"]
+        == dext_f["duration"].replace("n/a", "0.0").astype(float).values
+    ).all()
+    assert (ev_kwargs_filtered["description"] == dext_f["trial_type"].values).all()
+    assert (
+        ev_kwargs_filtered["duration"][0] == 0.0
+    )  # now idx=0, as first row is filtered out
+
+    # ---------------- default if missing trial_type  ------------------------
+    tmp_tsv_file = tmp_path / "events.tsv"
+    dext.drop(columns="trial_type").to_csv(tmp_tsv_file, sep="\t", index=False)
+
+    ev_kwargs_default = events_file_to_annotation_kwargs(events_fname=tmp_tsv_file)
+    assert (ev_kwargs_default["onset"] == dext_f["onset"].astype(float).values).all()
+    assert (
+        ev_kwargs_default["duration"]
+        == dext_f["duration"].replace("n/a", "0.0").astype(float).values
+    ).all()
+    assert (
+        np.sort(np.unique(ev_kwargs_default["description"]))
+        == np.sort(dext_f["value"].unique())
+    ).all()