Skip to content

Commit 517707a

Browse files
authored
Revert "Rework mne bids path match (#1355)"
This reverts commit 7b04694.
1 parent 7b04694 commit 517707a

File tree

4 files changed

+21
-161
lines changed

4 files changed

+21
-161
lines changed

doc/authors.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
.. _Amaia Benitez: https://github.com/AmaiaBA
66
.. _Anand Saini: https://github.com/anandsaini024
77
.. _Ariel Rokem: https://github.com/arokem
8-
.. _Arne Gottwald: https://github.com/waldie11
98
.. _Austin Hurst: https://github.com/a-hurst
109
.. _Berk Gerçek: https://github.com/berkgercek
1110
.. _Bruno Hebling Vieira: https://github.com/bhvieira
1211
.. _Chris Holdgraf: https://bids.berkeley.edu/people/chris-holdgraf
13-
.. _Christian O'Reilly: https://github.com/christian-oreilly
1412
.. _Clemens Brunner: https://github.com/cbrnr
1513
.. _Daniel McCloy: http://dan.mccloy.info
1614
.. _Denis Engemann: https://github.com/dengemann
@@ -54,3 +52,4 @@
5452
.. _Tom Donoghue: https://github.com/TomDonoghue
5553
.. _William Turner: https://bootstrapbill.github.io/
5654
.. _Yorguin Mantilla: https://github.com/yjmantilla
55+
.. _Christian O'Reilly: https://github.com/christian-oreilly

doc/whats_new.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ The following authors contributed for the first time. Thank you so much! 🤩
1919

2020
* `Christian O'Reilly`_
2121
* `Berk Gerçek`_
22-
* `Arne Gottwald`_
2322

2423
The following authors had contributed before. Thank you for sticking around! 🤘
2524

@@ -36,8 +35,6 @@ Detailed list of changes
3635
- :func:`mne_bids.write_raw_bids()` can now handle mne `Raw` objects with `eyegaze` and `pupil` channels, by `Christian O'Reilly`_ (:gh:`1344`)
3736
- :func:`mne_bids.get_entity_vals()` has a new parameter ``ignore_suffixes`` to easily ignore sidecar files, by `Daniel McCloy`_ (:gh:`1362`)
3837
- Empty-room matching now preferentially finds recordings in the subject directory tagged as `task-noise` before looking in the `sub-emptyroom` directories. This adds support for a part of the BIDS specification for ER recordings, by `Berk Gerçek`_ (:gh:`1364`)
39-
- Path matching is now implemenented in a more efficient manner within `_return_root_paths`, by `Arne Gottwald` (:gh:`1355`)
40-
- :func:`mne_bids.get_entity_vals()` has a new parameter ``include_match`` to prefilter item matching and ignore non-matched items from begin of directory scan, by `Arne Gottwald` (:gh:`1355`)
4138

4239

4340
🧐 API and behavior changes
@@ -56,7 +53,6 @@ Detailed list of changes
5653
- :func:`mne_bids.read_raw_bids` can optionally return an ``event_id`` dictionary suitable for use with :func:`mne.events_from_annotations`, and if a ``values`` column is present in ``events.tsv`` it will be used as the source of the integer event ID codes, by `Daniel McCloy`_ (:gh:`1349`)
5754
- BIDS dictates that the recording entity should be displayed as "_recording-" in the filename. This PR makes :class:`mne_bids.BIDSPath` correctly display "_recording-" (instead of "_rec-") in BIDSPath.fpath. By `Scott Huberty`_ (:gh:`1348`)
5855
- :func:`mne_bids.make_dataset_description` now correctly encodes the dataset description as UTF-8 on disk, by `Scott Huberty`_ (:gh:`1357`)
59-
- Corrects extension match within `_filter_fnames` at end of filename, by `Arne Gottwald` (:gh:`1355`)
6056

6157
⚕️ Code health
6258
^^^^^^^^^^^^^^

mne_bids/path.py

Lines changed: 20 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from copy import deepcopy
1313
from datetime import datetime
1414
from io import StringIO
15-
from itertools import chain as iter_chain
1615
from os import path as op
1716
from pathlib import Path
1817
from textwrap import indent
@@ -1463,11 +1462,9 @@ def _truncate_tsv_line(line, lim=10):
14631462
"""Truncate a line to the specified number of characters."""
14641463
return "".join(
14651464
[
1466-
(
1467-
str(val) + (lim - len(val)) * " "
1468-
if len(val) < lim
1469-
else f"{val[: lim - 1]} "
1470-
)
1465+
str(val) + (lim - len(val)) * " "
1466+
if len(val) < lim
1467+
else f"{val[: lim - 1]} "
14711468
for val in line.split("\t")
14721469
]
14731470
)
@@ -1949,7 +1946,6 @@ def get_entity_vals(
19491946
ignore_datatypes=None,
19501947
ignore_dirs=("derivatives", "sourcedata"),
19511948
ignore_suffixes=None,
1952-
include_match=None,
19531949
with_key=False,
19541950
verbose=None,
19551951
):
@@ -2014,9 +2010,6 @@ def get_entity_vals(
20142010
ignore_suffixes : str | array-like of str | None
20152011
Suffixes to ignore. If ``None``, include all suffixes. This can be helpful for
20162012
ignoring non-data sidecars such as `*_scans.tsv` or `*_coordsystem.json`.
2017-
include_match : str | array-like of str | None
2018-
Apply a starting match pragma following Unix style pattern syntax from
2019-
package glob to prefilter search criterion.
20202013
20212014
.. versionadded:: 0.17
20222015
with_key : bool
@@ -2114,13 +2107,7 @@ def get_entity_vals(
21142107

21152108
p = re.compile(rf"{entity_long_abbr_map[entity_key]}-(.*?)_")
21162109
values = list()
2117-
search_str = f"**/*{entity_long_abbr_map[entity_key]}-*_*"
2118-
if include_match is not None:
2119-
include_match = _ensure_tuple(include_match)
2120-
filenames = [root.glob(im + search_str) for im in include_match]
2121-
filenames = iter_chain(*filenames)
2122-
else:
2123-
filenames = root.glob(search_str)
2110+
filenames = root.glob(f"**/*{entity_long_abbr_map[entity_key]}-*_*")
21242111

21252112
for filename in filenames:
21262113
# Skip ignored directories
@@ -2371,7 +2358,7 @@ def _filter_fnames(
23712358
r"_desc-(" + "|".join(description) + ")" if description else r"(|_desc-([^_]+))"
23722359
)
23732360
suffix_str = r"_(" + "|".join(suffix) + ")" if suffix else r"_([^_]+)"
2374-
ext_str = r"(" + "|".join(extension) + ")$" if extension else r".([^_]+)"
2361+
ext_str = r"(" + "|".join(extension) + ")" if extension else r".([^_]+)"
23752362

23762363
regexp = (
23772364
leading_path_str
@@ -2515,7 +2502,7 @@ def find_matching_paths(
25152502

25162503

25172504
def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False):
2518-
"""Return all file paths + .ds paths in root.
2505+
"""Return all file paths in root.
25192506
25202507
Can be filtered by datatype (which is present in the path but not in
25212508
the BIDSPath basename). Can also be list of datatypes.
@@ -2536,50 +2523,29 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
25362523
Returns
25372524
-------
25382525
paths : list of pathlib.Path
2539-
All files + .ds paths in `root`, filtered according to the function parameters.
2526+
All paths in `root`, filtered according to the function parameters.
25402527
"""
25412528
root = Path(root) # if root is str
25422529

2543-
if datatype is None and not ignore_nosub:
2544-
search_str = "*.*"
2545-
paths = root.rglob(search_str)
2530+
if datatype is not None:
2531+
datatype = _ensure_tuple(datatype)
2532+
search_str = f"*/{'|'.join(datatype)}/*"
25462533
else:
2547-
if datatype is not None:
2548-
datatype = _ensure_tuple(datatype)
2549-
search_str = f"**/{'|'.join(datatype)}/.*"
2550-
else:
2551-
search_str = "**/*.*"
2552-
2553-
# only browse files which are of the form root/sub-*,
2554-
# such that we truely only look in 'sub'-folders:
2555-
2556-
if ignore_nosub:
2557-
search_str = "sub-*/" + search_str
2558-
2559-
paths = [
2560-
Path(root, fn)
2561-
for fn in glob.iglob(search_str, root_dir=root, recursive=True)
2562-
]
2534+
search_str = "*.*"
25632535

2536+
paths = root.rglob(search_str)
25642537
# Only keep files (not directories), ...
25652538
# and omit the JSON sidecars if `ignore_json` is True.
25662539
if ignore_json:
2567-
paths = [
2568-
p
2569-
for p in paths
2570-
if (p.is_file() and p.suffix != ".json")
2571-
# XXX: generalize with a private func that takes
2572-
# a config of which "data format" are to be expected like .ds
2573-
or (p.is_dir() and p.suffix == ".ds")
2574-
]
2540+
paths = [p for p in paths if p.is_file() and p.suffix != ".json"]
25752541
else:
2576-
paths = [
2577-
p
2578-
for p in paths
2579-
if p.is_file()
2580-
# XXX: see above, generalize with private func
2581-
or (p.is_dir() and p.suffix == ".ds")
2582-
]
2542+
paths = [p for p in paths if p.is_file()]
2543+
2544+
# only keep files which are of the form root/sub-*,
2545+
# such that we only look in 'sub'-folders:
2546+
if ignore_nosub:
2547+
root_sub = str(root / "sub-")
2548+
paths = [p for p in paths if str(p).startswith(root_sub)]
25832549

25842550
return paths
25852551

mne_bids/tests/test_path.py

Lines changed: 0 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@
77
import os.path as op
88
import shutil
99
import shutil as sh
10-
import timeit
1110
from datetime import datetime, timezone
1211
from pathlib import Path
1312

1413
import mne
15-
import numpy as np
1614
import pytest
1715
from mne.datasets import testing
1816
from mne.io import anonymize_info
@@ -168,105 +166,6 @@ def test_get_entity_vals(entity, expected_vals, kwargs, return_bids_test_dir):
168166
shutil.rmtree(deriv_path)
169167

170168

171-
def test_path_benchmark(tmp_path_factory):
172-
"""Benchmark exploring bids tree."""
173-
# This benchmark is to verify the speed-up in function call get_entity_vals with
174-
# `include_match=sub-*/` in face of a bids tree hosting derivatives and sourcedata.
175-
n_subjects = 20
176-
n_sessions = 10
177-
n_derivatives = 17
178-
tmp_bids_root = tmp_path_factory.mktemp("mnebids_utils_test_bids_ds")
179-
180-
derivatives = [
181-
Path("derivatives", "derivatives" + str(i)) for i in range(n_derivatives)
182-
]
183-
184-
bids_subdirectories = ["", "sourcedata", *derivatives]
185-
186-
# Create a BIDS compliant directory tree with high number of branches
187-
for i in range(1, n_subjects):
188-
for j in range(1, n_sessions):
189-
for subdir in bids_subdirectories:
190-
for datatype in ["eeg", "meg"]:
191-
bids_subdir = BIDSPath(
192-
subject=str(i),
193-
session=str(j),
194-
datatype=datatype,
195-
task="audvis",
196-
root=str(tmp_bids_root / subdir),
197-
)
198-
bids_subdir.mkdir(exist_ok=True)
199-
Path(bids_subdir.root / "participants.tsv").touch()
200-
Path(bids_subdir.root / "participants.csv").touch()
201-
Path(bids_subdir.root / "README").touch()
202-
203-
# os.makedirs(bids_subdir.directory, exist_ok=True)
204-
Path(
205-
bids_subdir.directory, bids_subdir.basename + "_events.tsv"
206-
).touch()
207-
Path(
208-
bids_subdir.directory, bids_subdir.basename + "_events.csv"
209-
).touch()
210-
211-
if datatype == "meg":
212-
ctf_path = Path(
213-
bids_subdir.directory, bids_subdir.basename + "_meg.ds"
214-
)
215-
ctf_path.mkdir(exist_ok=True)
216-
Path(ctf_path, bids_subdir.basename + ".meg4").touch()
217-
Path(ctf_path, bids_subdir.basename + ".hc").touch()
218-
Path(ctf_path / "hz.ds").mkdir(exist_ok=True)
219-
Path(ctf_path / "hz.ds" / "hz.meg4").touch()
220-
Path(ctf_path / "hz.ds" / "hz.hc").touch()
221-
222-
# apply nosub on find_matching_matchs with root level bids directory should
223-
# yield a performance boost of order of length from bids_subdirectories.
224-
timed_all, timed_ignored_nosub = (
225-
timeit.timeit(
226-
"mne_bids.find_matching_paths(tmp_bids_root)",
227-
setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'",
228-
number=1,
229-
),
230-
timeit.timeit(
231-
"mne_bids.find_matching_paths(tmp_bids_root, ignore_nosub=True)",
232-
setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'",
233-
number=10,
234-
)
235-
/ 10,
236-
)
237-
238-
assert (
239-
timed_all / (10 * np.maximum(1, np.floor(len(bids_subdirectories) / 10)))
240-
# while this should be of same order, lets give it some space by a factor of 2
241-
> 0.5 * timed_ignored_nosub
242-
)
243-
244-
# apply include_match on get_entity_vals with root level bids directory should
245-
# yield a performance boost of order of length from bids_subdirectories.
246-
timed_entity, timed_entity_match = (
247-
timeit.timeit(
248-
"mne_bids.get_entity_vals(tmp_bids_root, 'session')",
249-
setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'",
250-
number=1,
251-
),
252-
timeit.timeit(
253-
"mne_bids.get_entity_vals(tmp_bids_root, 'session', include_match='sub-*/')", # noqa: E501
254-
setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'",
255-
number=10,
256-
)
257-
/ 10,
258-
)
259-
260-
assert (
261-
timed_entity / (10 * np.maximum(1, np.floor(len(bids_subdirectories) / 10)))
262-
# while this should be of same order, lets give it some space by a factor of 2
263-
> 0.5 * timed_entity_match
264-
)
265-
266-
# Clean up
267-
shutil.rmtree(tmp_bids_root)
268-
269-
270169
def test_search_folder_for_text(capsys):
271170
"""Test finding entries."""
272171
with pytest.raises(ValueError, match="is not a directory"):

0 commit comments

Comments
 (0)