added unit tests for events_file_to_annotation_kwargs

matthiasdold · matthiasdold · commit f98ac2bbd924 · 2025-04-07T17:32:46.000+02:00
diff --git a/mne_bids/read.py b/mne_bids/read.py
@@ -523,8 +523,49 @@ def _handle_info_reading(sidecar_fname, raw):
     return raw
 
 
-def _events_file_to_annotation_kwargs(events_fname: str) -> dict:
-    """Read the `events.tsv` file and extract onset, duration, and description."""
+def events_file_to_annotation_kwargs(events_fname: str | Path) -> dict:
+    """
+    Read the `events.tsv` file and extract onset, duration, and description.
+
+    This function reads an events file in TSV format and extracts the onset,
+    duration, and description of events.
+
+    Parameters
+    ----------
+    events_fname : str
+        The file path to the `events.tsv` file.
+
+    Returns
+    -------
+    dict
+        A dictionary containing the following keys:
+        - 'onset' : np.ndarray
+            The onset times of the events in seconds.
+        - 'duration' : np.ndarray
+            The durations of the events in seconds.
+        - 'description' : np.ndarray
+            The descriptions of the events.
+        - 'event_id' : dict
+            A dictionary mapping event descriptions to integer event IDs.
+
+    Notes
+    -----
+    The function handles the following cases:
+    - If the `trial_type` column is available, it uses it for event descriptions.
+    - If the `stim_type` column is available, it uses it for backward compatibility.
+    - If the `value` column is available, it uses it to create the `event_id`.
+    - If none of the above columns are available, it defaults to using 'n/a' for
+      descriptions and 1 for event IDs.
+
+    Examples (TBD REWORK THIS)
+    --------
+    >>> events_dict = events_file_to_annotation_kwargs('path/to/events.tsv')
+    >>> print(events_dict['onset'])
+    [0.1, 0.2, 0.3]
+    >>> print(events_dict['event_id'])
+    {'event1': 1, 'event2': 2}
+
+    """
     logger.info(f"Reading events from {events_fname}.")
     events_dict = _from_tsv(events_fname)
 
@@ -606,7 +647,7 @@ def _events_file_to_annotation_kwargs(events_fname: str) -> dict:
 
 def _handle_events_reading(events_fname, raw):
     """Read associated events.tsv and convert valid events to annotations on Raw."""
-    annotations_info = _events_file_to_annotation_kwargs(events_fname)
+    annotations_info = events_file_to_annotation_kwargs(events_fname)
     event_id = annotations_info["event_id"]
 
     # Add events as Annotations, but keep essential Annotations present in raw file
diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
@@ -14,6 +14,7 @@
 
 import mne
 import numpy as np
+import pandas as pd
 import pytest
 from mne.datasets import testing
 from mne.io.constants import FIFF
@@ -32,6 +33,7 @@
     _handle_events_reading,
     _handle_scans_reading,
     _read_raw,
+    events_file_to_annotation_kwargs,
     get_head_mri_trans,
     read_raw_bids,
 )
@@ -855,9 +857,7 @@ def test_handle_chpi_reading(tmp_path):
     meg_json_data_freq_mismatch["HeadCoilFrequency"][0] = 123
     _write_json(meg_json_path, meg_json_data_freq_mismatch, overwrite=True)
 
-    with (
-        pytest.warns(RuntimeWarning, match="Defaulting to .* mne.Raw object"),
-    ):
+    with (pytest.warns(RuntimeWarning, match="Defaulting to .* mne.Raw object"),):
         raw_read = read_raw_bids(bids_path, extra_params=dict(allow_maxshield="yes"))
 
     # cHPI "off" according to sidecar, but present in the data
@@ -1078,9 +1078,7 @@ def test_handle_ieeg_coords_reading(bids_path, tmp_path):
     _to_tsv(electrodes_dict, electrodes_fname)
     # popping off channels should not result in an error
     # however, a warning will be raised through mne-python
-    with (
-        pytest.warns(RuntimeWarning, match="DigMontage is only a subset of info"),
-    ):
+    with (pytest.warns(RuntimeWarning, match="DigMontage is only a subset of info"),):
         read_raw_bids(bids_path=bids_fname, verbose=False)
 
     # make sure montage is set if there are coordinates w/ 'n/a'
@@ -1096,9 +1094,7 @@ def test_handle_ieeg_coords_reading(bids_path, tmp_path):
     # electrode coordinates should be nan
     # when coordinate is 'n/a'
     nan_chs = [electrodes_dict["name"][i] for i in [0, 3]]
-    with (
-        pytest.warns(RuntimeWarning, match="There are channels without locations"),
-    ):
+    with (pytest.warns(RuntimeWarning, match="There are channels without locations"),):
         raw = read_raw_bids(bids_path=bids_fname, verbose=False)
         for idx, ch in enumerate(raw.info["chs"]):
             if ch["ch_name"] in nan_chs:
@@ -1226,9 +1222,7 @@ def test_handle_non_mne_channel_type(tmp_path):
     channels_data["type"][ch_idx] = "FOOBAR"
     _to_tsv(data=channels_data, fname=channels_tsv_path)
 
-    with (
-        pytest.warns(RuntimeWarning, match='will be set to "misc"'),
-    ):
+    with (pytest.warns(RuntimeWarning, match='will be set to "misc"'),):
         raw = read_raw_bids(bids_path)
 
     # Should be a 'misc' channel.
@@ -1466,3 +1460,75 @@ def test_gsr_and_temp_reading():
     raw = read_raw_bids(bids_path)
     assert raw.get_channel_types(["GSR"]) == ["gsr"]
     assert raw.get_channel_types(["Temperature"]) == ["temperature"]
+
+
+def test_events_file_to_annotation_kwargs(tmp_path):
+    bids_path = BIDSPath(
+        subject="01", session="eeg", task="rest", datatype="eeg", root=tiny_bids_root
+    )
+    events_fname = _find_matching_sidecar(bids_path, suffix="events", extension=".tsv")
+
+    # ---------------- plain read --------------------------------------------
+    df = pd.read_csv(events_fname, sep="\t")
+    ev_kwargs = events_file_to_annotation_kwargs(events_fname=events_fname)
+    assert (ev_kwargs["onset"] == df["onset"].values).all()
+    assert (ev_kwargs["duration"] == df["duration"].values).all()
+    assert (ev_kwargs["description"] == df["trial_type"].values).all()
+
+    # ---------------- filtering out n/a values ------------------------------
+    tmp_tsv_file = tmp_path / "events.tsv"
+    dext = pd.concat(
+        [df.copy().assign(onset=df.onset + i) for i in range(5)]
+    ).reset_index(drop=True)
+
+    dext = dext.assign(
+        ix=range(len(dext)),
+        value=dext.trial_type.map({"start_experiment": 1, "show_stimulus": 2}),
+        duration=1.0,
+    )
+
+    # nan values for `_drop` must be string values, `_drop` is called on
+    # `onset`, `value` and `trial_type`. `duration` n/a should end up as float 0
+    for c in ["onset", "value", "trial_type", "duration"]:
+        dext[c] = dext[c].astype(str)
+
+    dext.loc[0, "onset"] = "n/a"
+    dext.loc[1, "duration"] = "n/a"
+    dext.loc[4, "trial_type"] = "n/a"
+    dext.loc[4, "value"] = (
+        "n/a"  # to check that filtering is also applied when we drop the `trial_type`
+    )
+    dext.to_csv(tmp_tsv_file, sep="\t", index=False)
+
+    ev_kwargs_filtered = events_file_to_annotation_kwargs(events_fname=tmp_tsv_file)
+
+    dext_f = dext[
+        (dext["onset"] != "n/a")
+        & (dext["trial_type"] != "n/a")
+        & (dext["value"] != "n/a")
+    ]
+
+    assert (ev_kwargs_filtered["onset"] == dext_f["onset"].astype(float).values).all()
+    assert (
+        ev_kwargs_filtered["duration"]
+        == dext_f["duration"].replace("n/a", "0.0").astype(float).values
+    ).all()
+    assert (ev_kwargs_filtered["description"] == dext_f["trial_type"].values).all()
+    assert (
+        ev_kwargs_filtered["duration"][0] == 0.0
+    )  # now idx=0, as first row is filtered out
+
+    # ---------------- default if missing trial_type  ------------------------
+    tmp_tsv_file = tmp_path / "events.tsv"
+    dext.drop(columns="trial_type").to_csv(tmp_tsv_file, sep="\t", index=False)
+
+    ev_kwargs_default = events_file_to_annotation_kwargs(events_fname=tmp_tsv_file)
+    assert (ev_kwargs_default["onset"] == dext_f["onset"].astype(float).values).all()
+    assert (
+        ev_kwargs_default["duration"]
+        == dext_f["duration"].replace("n/a", "0.0").astype(float).values
+    ).all()
+    assert (
+        np.sort(np.unique(ev_kwargs_default["description"]))
+        == np.sort(dext_f["value"].unique())
+    ).all()