[WIP] Improve check of scans.tsv for BrainVision files (#1034)

teonbrooks · sappelhoff · agramfort · web-flow · commit 46da987caceb · 2022-08-11T10:10:45.000+02:00
* Update read.py

* cleanup

* added test and fix the logic in the file handling

* cleanup

* Update doc/whats_new.rst

Co-authored-by: Stefan Appelhoff &lt;stefan.appelhoff@mailbox.org&gt;

* fix the filepath for windows

* Update read.py

* fix?

* restart ci

* fix

* fix

Co-authored-by: Stefan Appelhoff &lt;stefan.appelhoff@mailbox.org&gt;
Co-authored-by: Alexandre Gramfort &lt;alexandre.gramfort@m4x.org&gt;
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -92,6 +92,8 @@ Detailed list of changes
 
 - Instead of deleting files and raising cryptic errors, an intentional error message is now sent when calling :func:`~mne_bids.write_raw_bids` with the source file identical to the destination file, unless ``format`` is specified, by `Adam Li`_ and `Stefan Appelhoff`_ (:gh:`889`)
 
+- Internal helper function to :func:`~mne_bids.read_raw_bids` would reject BrainVision data if ``_scans.tsv`` listed a ``.eeg`` file instead of ``.vhdr``, by `Teon Brooks`_ (:gh:`1034`)
+
 :doc:`Find out what was new in previous releases <whats_new_previous_releases>`
 
 .. include:: authors.rst
diff --git a/mne_bids/read.py b/mne_bids/read.py
@@ -233,13 +233,23 @@ def _handle_scans_reading(scans_fname, raw, bids_path):
     # get the row corresponding to the file
     # use string concatenation instead of os.path
     # to work nicely with windows
-    data_fname = bids_path.datatype + '/' + fname
+    data_fname = Path(bids_path.datatype) / fname
     fnames = scans_tsv['filename']
+    fnames = [Path(fname) for fname in fnames]
     if 'acq_time' in scans_tsv:
         acq_times = scans_tsv['acq_time']
     else:
         acq_times = ['n/a'] * len(fnames)
 
+    # There are three possible extensions for BrainVision
+    # First gather all the possible extensions
+    acq_suffixes = set(fname.suffix for fname in fnames)
+    # Add the filename extension for the bids folder
+    acq_suffixes.add(Path(data_fname).suffix)
+
+    if all(suffix in ('.vhdr', '.eeg', '.vmrk') for suffix in acq_suffixes):
+        ext = fnames[0].suffix
+        data_fname = Path(data_fname).with_suffix(ext)
     row_ind = fnames.index(data_fname)
 
     # check whether all split files have the same acq_time
@@ -250,7 +260,9 @@ def _handle_scans_reading(scans_fname, raw, bids_path):
                              bids_path.basename[:split_idx] +
                              r'split-\d+_' + bids_path.datatype +
                              bids_path.fpath.suffix)
-        split_fnames = list(filter(pattern.match, fnames))
+        split_fnames = list(filter(
+            lambda x: pattern.match(x.as_posix()), fnames
+        ))
         split_acq_times = []
         for split_f in split_fnames:
             split_acq_times.append(acq_times[fnames.index(split_f)])
diff --git a/mne_bids/tests/data/tiny_bids/code/make_tiny_bids_dataset.py b/mne_bids/tests/data/tiny_bids/code/make_tiny_bids_dataset.py
@@ -3,6 +3,7 @@
 import json
 import os
 import os.path as op
+from pathlib import Path
 
 import mne
 import numpy as np
@@ -14,7 +15,7 @@
 vhdr_fname = op.join(data_path, "montage", "bv_dig_test.vhdr")
 captrak_path = op.join(data_path, "montage", "captrak_coords.bvct")
 
-mne_bids_root = os.sep.join(mne_bids.__file__.split("/")[:-2])
+mne_bids_root = Path(mne_bids.__file__).parent.parent
 tiny_bids = op.join(mne_bids_root, "mne_bids", "tests", "data", "tiny_bids")
 os.makedirs(tiny_bids, exist_ok=True)
 
diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
@@ -5,8 +5,9 @@
 import json
 import os
 import os.path as op
-import pathlib
+from pathlib import Path
 from datetime import datetime, timezone
+from typing import OrderedDict
 
 import pytest
 import shutil as sh
@@ -23,7 +24,7 @@
 from mne_bids.config import (MNE_STR_TO_FRAME, BIDS_SHARED_COORDINATE_FRAMES,
                              BIDS_TO_MNE_FRAMES)
 from mne_bids.read import (read_raw_bids, _read_raw, get_head_mri_trans,
-                           _handle_events_reading)
+                           _handle_events_reading, _handle_scans_reading)
 from mne_bids.tsv_handler import _to_tsv, _from_tsv
 from mne_bids.utils import (_write_json)
 from mne_bids.sidecar_updates import _update_sidecar
@@ -56,6 +57,10 @@
 # Data with cHPI info
 raw_fname_chpi = op.join(data_path, 'SSS', 'test_move_anon_raw.fif')
 
+# Tiny BIDS testing dataset
+mne_bids_root = Path(mne_bids.__file__).parent.parent
+tiny_bids = op.join(mne_bids_root, "mne_bids", "tests", "data", "tiny_bids")
+
 warning_str = dict(
     channel_unit_changed='ignore:The unit for chann*.:RuntimeWarning:mne',
     meas_date_set_to_none="ignore:.*'meas_date' set to None:RuntimeWarning:"
@@ -567,6 +572,38 @@ def test_handle_scans_reading(tmp_path):
     assert new_acq_time != raw_01.info['meas_date']
 
 
+def test_handle_scans_reading_brainvision(tmp_path):
+    """Test stability of BrainVision's different file extensions"""
+    test_scan_eeg = OrderedDict(
+        [('filename', [Path('eeg/sub-01_ses-eeg_task-rest_eeg.eeg')]),
+         ('acq_time', ['2000-01-01T12:00:00.000000Z'])]
+    )
+    test_scan_vmrk = OrderedDict(
+        [('filename', [Path('eeg/sub-01_ses-eeg_task-rest_eeg.vmrk')]),
+         ('acq_time', ['2000-01-01T12:00:00.000000Z'])]
+    )
+    test_scan_edf = OrderedDict(
+        [('filename', [Path('eeg/sub-01_ses-eeg_task-rest_eeg.edf')]),
+         ('acq_time', ['2000-01-01T12:00:00.000000Z'])]
+    )
+    os.mkdir(tmp_path / 'eeg')
+    for test_scan in [test_scan_eeg, test_scan_vmrk, test_scan_edf]:
+        _to_tsv(test_scan, tmp_path / test_scan['filename'][0])
+
+    bids_path = BIDSPath(subject='01', session='eeg', task='rest',
+                         datatype='eeg', root=tiny_bids)
+    with pytest.warns(RuntimeWarning, match='Not setting positions'):
+        raw = read_raw_bids(bids_path)
+
+    for test_scan in [test_scan_eeg, test_scan_vmrk]:
+        _handle_scans_reading(tmp_path / test_scan['filename'][0],
+                              raw, bids_path)
+
+    with pytest.raises(ValueError, match="is not in list"):
+        _handle_scans_reading(tmp_path / test_scan_edf['filename'][0],
+                              raw, bids_path)
+
+
 @pytest.mark.filterwarnings(warning_str['channel_unit_changed'])
 def test_handle_info_reading(tmp_path):
     """Test reading information from a BIDS sidecar JSON file."""
@@ -587,7 +624,7 @@ def test_handle_info_reading(tmp_path):
     bids_fname.update(datatype=suffix)
     sidecar_fname = _find_matching_sidecar(bids_fname, suffix=suffix,
                                            extension='.json')
-    sidecar_fname = pathlib.Path(sidecar_fname)
+    sidecar_fname = Path(sidecar_fname)
 
     # assert that we get the same line frequency set
     raw = read_raw_bids(bids_path=bids_path)
@@ -1071,7 +1108,7 @@ def test_write_read_fif_split_file(tmp_path, monkeypatch):
     n_times = int(2.5e6 / n_channels)  # enough to produce a 10MB split
     data = np.empty((n_channels, n_times), dtype=np.float32)
     raw = mne.io.RawArray(data, raw.info)
-    big_fif_fname = pathlib.Path(tmp_dir) / 'test_raw.fif'
+    big_fif_fname = Path(tmp_dir) / 'test_raw.fif'
 
     split_size = '10MB'
     raw.save(big_fif_fname, split_size=split_size)
diff --git a/setup.cfg b/setup.cfg
@@ -87,6 +87,8 @@ filterwarnings =
     ignore:MEG ref channel RMSP did not.*:RuntimeWarning
     # Python 3.10+ and NumPy 1.22 (and maybe also newer NumPy versions?)
     ignore:.*distutils\.sysconfig module is deprecated.*:DeprecationWarning
+    # numba with NumPy dev
+    ignore:`np.MachAr` is deprecated.*:DeprecationWarning
 
 [pydocstyle]
 convention = pep257