From 92d418f0cb3ed8b70bcffd469f67f1c446e5d04b Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:38:51 +0100 Subject: [PATCH 01/22] enforce extension match on end of filename --- mne_bids/path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 63d6d1259..98781f7a3 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2286,7 +2286,7 @@ def _filter_fnames( r"_desc-(" + "|".join(description) + ")" if description else r"(|_desc-([^_]+))" ) suffix_str = r"_(" + "|".join(suffix) + ")" if suffix else r"_([^_]+)" - ext_str = r"(" + "|".join(extension) + ")" if extension else r".([^_]+)" + ext_str = r"(" + "|".join(extension) + ")$" if extension else r".([^_]+)" regexp = ( leading_path_str From 0fc5861d8cd92779d9cecb04da6755bffea806da Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:43:17 +0100 Subject: [PATCH 02/22] rework order in filtering for ignore_nosub including switch to glob.glob --- mne_bids/path.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 98781f7a3..9aec9d3e7 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2455,13 +2455,25 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False """ root = Path(root) # if root is str - if datatype is not None: - datatype = _ensure_tuple(datatype) - search_str = f'*/{"|".join(datatype)}/*' - else: + if datatype is None and not ignore_nosub: search_str = "*.*" + else: + + if datatype is not None: + datatype = _ensure_tuple(datatype) + search_str = f'**/{"|".join(datatype)}/*' - paths = root.rglob(search_str) + else: + search_str = "**/*.*" + if ignore_nosub: + search_str = "sub-*/" + search_str + + paths = list( + map( + lambda fn: Path(root, fn), + glob.glob(search_str, root_dir=root, recursive=True), + ) + ) # Only keep files (not directories), ... # and omit the JSON sidecars if `ignore_json` is True. if ignore_json: @@ -2469,12 +2481,6 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False else: paths = [p for p in paths if p.is_file()] - # only keep files which are of the form root/sub-*, - # such that we only look in 'sub'-folders: - if ignore_nosub: - root_sub = str(root / "sub-") - paths = [p for p in paths if str(p).startswith(root_sub)] - return paths From 663389846eb372637c91adbbcbc5fac4e7d1c073 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:44:06 +0100 Subject: [PATCH 03/22] provide last call from root.rglob --- mne_bids/path.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mne_bids/path.py b/mne_bids/path.py index 9aec9d3e7..85b41aa9a 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2457,6 +2457,7 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False if datatype is None and not ignore_nosub: search_str = "*.*" + paths = root.rglob(search_str) else: if datatype is not None: From 11131b395e93e6b88113c1cbbafbee1434aeb3f9 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:45:52 +0100 Subject: [PATCH 04/22] in bids meg, it is allowed to have folders from ctf raw meg data which are not files. Are there more folders in different datatypes allowed? --- mne_bids/path.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 85b41aa9a..3eb0448a9 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2430,7 +2430,7 @@ def find_matching_paths( def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False): - """Return all file paths in root. + """Return all file paths + .ds paths in root. Can be filtered by datatype (which is present in the path but not in the BIDSPath basename). Can also be list of datatypes. @@ -2451,7 +2451,7 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False Returns ------- paths : list of pathlib.Path - All paths in `root`, filtered according to the function parameters. + All files + .ds paths in `root`, filtered according to the function parameters. """ root = Path(root) # if root is str @@ -2478,9 +2478,21 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False # Only keep files (not directories), ... # and omit the JSON sidecars if `ignore_json` is True. if ignore_json: - paths = [p for p in paths if p.is_file() and p.suffix != ".json"] + paths = [ + p + for p in paths + if (p.is_file() and p.suffix != ".json") + # do we really want to do this here? + or (p.is_dir() and p.suffix == ".ds") + ] else: - paths = [p for p in paths if p.is_file()] + paths = [ + p + for p in paths + if p.is_file() + # do we really want to do this here + or (p.is_dir() and p.suffix == ".ds") + ] return paths From 41de35ea9c924e7663efefc68b159a23a5aa8c69 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:47:21 +0100 Subject: [PATCH 05/22] also filter for existence of file extension in name when datatype is provided --- mne_bids/path.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mne_bids/path.py b/mne_bids/path.py index 3eb0448a9..e51305359 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2464,6 +2464,8 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False datatype = _ensure_tuple(datatype) search_str = f'**/{"|".join(datatype)}/*' + # I think this one is more appropriate + search_str = search_str + ".*" else: search_str = "**/*.*" if ignore_nosub: From 163519717633132851cc7032a565bcb63aacc47c Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:48:33 +0100 Subject: [PATCH 06/22] comment on improved performance through sub-folder filtering --- mne_bids/path.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mne_bids/path.py b/mne_bids/path.py index e51305359..8f999263d 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2468,6 +2468,10 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False search_str = search_str + ".*" else: search_str = "**/*.*" + + # only browse files which are of the form root/sub-*, + # such that we truely only look in 'sub'-folders: + if ignore_nosub: search_str = "sub-*/" + search_str From 2ccdfc9c58792a8b89971ebe0e2e0d382db70281 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:59:06 +0100 Subject: [PATCH 07/22] prefilter match in get_entity_vals scan --- mne_bids/path.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 8f999263d..05b636d64 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -14,6 +14,7 @@ from os import path as op from pathlib import Path from textwrap import indent +from itertools import chain as iter_chain import numpy as np from mne.utils import _check_fname, _validate_type, logger, verbose @@ -1897,6 +1898,7 @@ def get_entity_vals( ignore_modalities=None, ignore_datatypes=None, ignore_dirs=("derivatives", "sourcedata"), + include_match=None, with_key=False, verbose=None, ): @@ -1957,6 +1959,11 @@ def get_entity_vals( Directories nested directly within ``root`` to ignore. If ``None``, include all directories in the search. + .. versionadded:: 0.17-dev + include_match : str | array-like of str | None + Apply a starting match pragma following Unix style pattern syntax from + package glob to prefilter search criterion. + .. versionadded:: 0.9 with_key : bool If ``True``, returns the full entity with the key and the value. This @@ -2052,7 +2059,13 @@ def get_entity_vals( p = re.compile(rf"{entity_long_abbr_map[entity_key]}-(.*?)_") values = list() - filenames = root.glob(f"**/*{entity_long_abbr_map[entity_key]}-*_*") + search_str = f"**/*{entity_long_abbr_map[entity_key]}-*_*" + if include_match is not None: + include_match = _ensure_tuple(include_match) + filenames = [root.glob(im + search_str) for im in include_match] + filenames = iter_chain(*filenames) + else: + filenames = root.glob(search_str) for filename in filenames: # Skip ignored directories From a4efdeb1c4f4f13188a49c81bfac10023808e669 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:52:27 +0100 Subject: [PATCH 08/22] pre-commit changes --- mne_bids/path.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 05b636d64..63b678c46 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -11,10 +11,10 @@ from copy import deepcopy from datetime import datetime from io import StringIO +from itertools import chain as iter_chain from os import path as op from pathlib import Path from textwrap import indent -from itertools import chain as iter_chain import numpy as np from mne.utils import _check_fname, _validate_type, logger, verbose @@ -2472,7 +2472,6 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False search_str = "*.*" paths = root.rglob(search_str) else: - if datatype is not None: datatype = _ensure_tuple(datatype) search_str = f'**/{"|".join(datatype)}/*' From e9147c74438c0fb1cc2390fbad670a3d2028840c Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:21:13 +0100 Subject: [PATCH 09/22] use iglob iterator --- mne_bids/path.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 63b678c46..4d246b0f1 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2487,12 +2487,8 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False if ignore_nosub: search_str = "sub-*/" + search_str - paths = list( - map( - lambda fn: Path(root, fn), - glob.glob(search_str, root_dir=root, recursive=True), - ) - ) + paths = [ Path(root, fn) for fn in glob.iglob(search_str, root_dir=root, recursive=True) ] + # Only keep files (not directories), ... # and omit the JSON sidecars if `ignore_json` is True. if ignore_json: From 67578dd799d8b656a4d7a0a0595fd89291c79409 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 3 Feb 2025 14:02:52 +0100 Subject: [PATCH 10/22] pre-commit syntax --- mne_bids/path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 4d246b0f1..9ab0b80d4 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2474,7 +2474,7 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False else: if datatype is not None: datatype = _ensure_tuple(datatype) - search_str = f'**/{"|".join(datatype)}/*' + search_str = f"**/{'|'.join(datatype)}/*" # I think this one is more appropriate search_str = search_str + ".*" From def6ab2886b075053ad5cef700a31d3a37ea9b80 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 3 Feb 2025 14:48:57 +0100 Subject: [PATCH 11/22] Add changelog --- doc/authors.rst | 3 ++- doc/whats_new.rst | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/authors.rst b/doc/authors.rst index cc061b1ea..22bcd1e43 100644 --- a/doc/authors.rst +++ b/doc/authors.rst @@ -5,10 +5,12 @@ .. _Amaia Benitez: https://github.com/AmaiaBA .. _Anand Saini: https://github.com/anandsaini024 .. _Ariel Rokem: https://github.com/arokem +.. _Arne Gottwald: https://github.com/waldie11 .. _Austin Hurst: https://github.com/a-hurst .. _Berk Gerçek: https://github.com/berkgercek .. _Bruno Hebling Vieira: https://github.com/bhvieira .. _Chris Holdgraf: https://bids.berkeley.edu/people/chris-holdgraf +.. _Christian O'Reilly: https://github.com/christian-oreilly .. _Clemens Brunner: https://github.com/cbrnr .. _Daniel McCloy: http://dan.mccloy.info .. _Denis Engemann: https://github.com/dengemann @@ -52,4 +54,3 @@ .. _Tom Donoghue: https://github.com/TomDonoghue .. _William Turner: https://bootstrapbill.github.io/ .. _Yorguin Mantilla: https://github.com/yjmantilla -.. _Christian O'Reilly: https://github.com/christian-oreilly diff --git a/doc/whats_new.rst b/doc/whats_new.rst index fcf110636..9b36f5861 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -19,6 +19,7 @@ The following authors contributed for the first time. Thank you so much! 🤩 * `Christian O'Reilly`_ * `Berk Gerçek`_ +* `Arne Gottwald`_ The following authors had contributed before. Thank you for sticking around! 🤘 @@ -35,6 +36,8 @@ Detailed list of changes - :func:`mne_bids.write_raw_bids()` can now handle mne `Raw` objects with `eyegaze` and `pupil` channels, by `Christian O'Reilly`_ (:gh:`1344`) - :func:`mne_bids.get_entity_vals()` has a new parameter ``ignore_suffixes`` to easily ignore sidecar files, by `Daniel McCloy`_ (:gh:`1362`) - Empty-room matching now preferentially finds recordings in the subject directory tagged as `task-noise` before looking in the `sub-emptyroom` directories. This adds support for a part of the BIDS specification for ER recordings, by `Berk Gerçek`_ (:gh:`1364`) +- Path matching is now implemenented in a more efficient manner within `_return_root_paths`, by `Arne Gottwald` (:gh:`1355`) +- :func:`mne_bids.get_entity_vals()` has a new parameter ``include_match`` to prefilter item matching and ignore non-matched items from begin of directory scan, by `Arne Gottwald` (:gh:`1355`) 🧐 API and behavior changes @@ -53,6 +56,7 @@ Detailed list of changes - :func:`mne_bids.read_raw_bids` can optionally return an ``event_id`` dictionary suitable for use with :func:`mne.events_from_annotations`, and if a ``values`` column is present in ``events.tsv`` it will be used as the source of the integer event ID codes, by `Daniel McCloy`_ (:gh:`1349`) - BIDS dictates that the recording entity should be displayed as "_recording-" in the filename. This PR makes :class:`mne_bids.BIDSPath` correctly display "_recording-" (instead of "_rec-") in BIDSPath.fpath. By `Scott Huberty`_ (:gh:`1348`) - :func:`mne_bids.make_dataset_description` now correctly encodes the dataset description as UTF-8 on disk, by `Scott Huberty`_ (:gh:`1357`) +- Corrects extension match within `_filter_fnames` at end of filename, by `Arne Gottwald` (:gh:`1355`) ⚕️ Code health ^^^^^^^^^^^^^^ From 9f2dff8e6bf2258ebf1ee79aa9a71df5db1bb0f5 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:24:44 +0100 Subject: [PATCH 12/22] benchmark _return_root_paths --- mne_bids/tests/test_path.py | 99 +++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index c2913d56e..9a4fcb028 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -7,10 +7,12 @@ import os.path as op import shutil import shutil as sh +import timeit from datetime import datetime, timezone from pathlib import Path import mne +import numpy as np import pytest from mne.datasets import testing from mne.io import anonymize_info @@ -166,6 +168,103 @@ def test_get_entity_vals(entity, expected_vals, kwargs, return_bids_test_dir): shutil.rmtree(deriv_path) +def test_path_benchmark(tmp_path_factory): + """Benchmark exploring bids tree.""" + tmp_bids_root = tmp_path_factory.mktemp("abc") + + derivatives = ["derivatives/derivatives" + str(i) for i in range(17)] + + bids_subdirectories = ["", "sourcedata", *derivatives] + + def equal_length_subj_ids(v): + assert all(v > 0) + n_digits = len(str(np.max(v))) + 1 + return list(map(lambda i: "0" * (n_digits - len(str(i))) + str(i), v)) + + # Create a BIDS compliant directory tree with high number of branches + for i in equal_length_subj_ids(np.arange(1, 20, dtype=int)): + for j in range(1, 9): + for subdir in bids_subdirectories: + for datatype in ["eeg", "meg"]: + bids_subdir = BIDSPath( + subject=i, + session="0" + str(j), + datatype=datatype, + task="audvis", + root=str(tmp_bids_root / subdir), + ) + bids_subdir.mkdir(exist_ok=True) + Path(bids_subdir.root / "participants.tsv").touch() + Path(bids_subdir.root / "participants.csv").touch() + Path(bids_subdir.root / "README").touch() + + # os.makedirs(bids_subdir.directory, exist_ok=True) + Path( + bids_subdir.directory, bids_subdir.basename + "_events.tsv" + ).touch() + Path( + bids_subdir.directory, bids_subdir.basename + "_events.csv" + ).touch() + + if datatype == "meg": + ctf_path = Path( + bids_subdir.directory, bids_subdir.basename + "_meg.ds" + ) + ctf_path.mkdir(exist_ok=True) + Path(ctf_path, bids_subdir.basename + ".meg4").touch() + Path(ctf_path, bids_subdir.basename + ".hc").touch() + Path(ctf_path / "hz.ds").mkdir(exist_ok=True) + Path(ctf_path / "hz.ds" / "hz.meg4").touch() + Path(ctf_path / "hz.ds" / "hz.hc").touch() + + # apply nosub on find_matching_matchs with root level bids directory should + # yield a performance boost of order of length from bids_subdirectories. + timed_all, timed_ignored_nosub = ( + timeit.timeit( + "mne_bids.find_matching_paths(tmp_bids_root)", + setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + number=1, + ), + timeit.timeit( + "mne_bids.find_matching_paths(tmp_bids_root, ignore_nosub=True)", + setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + number=10, + ) + / 10, + ) + + assert ( + timed_all / (10 * np.maximum(1, np.floor(len(bids_subdirectories) / 10))) + # while this should be of same order, lets give it some space by a factor of 2 + > 0.5 * timed_ignored_nosub + ) + + # apply include_match on get_entity_vals with root level bids directory should + # yield a performance boost of order of length from bids_subdirectories. + timed_entity, timed_entity_match = ( + timeit.timeit( + "mne_bids.get_entity_vals(tmp_bids_root, 'session')", + setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + number=1, + ), + timeit.timeit( + "mne_bids.get_entity_vals(tmp_bids_root, 'session', include_match='sub-*/')", # noqa: E501 + setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + number=10, + ) + / 10, + ) + + assert ( + timed_entity / (10 * np.maximum(1, np.floor(len(bids_subdirectories) / 10))) + # while this should be of same order, lets give it some space by a factor of 2 + > 0.5 * timed_entity_match + ) + + # Clean up + shutil.rmtree(tmp_bids_root) + + def test_search_folder_for_text(capsys): """Test finding entries.""" with pytest.raises(ValueError, match="is not a directory"): From 3243c053bdb2ca8ac024dbb5df1d6b4a1c61b7c1 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:16:01 +0100 Subject: [PATCH 13/22] Always use Path --- mne_bids/tests/test_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 9a4fcb028..783875c45 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -172,7 +172,7 @@ def test_path_benchmark(tmp_path_factory): """Benchmark exploring bids tree.""" tmp_bids_root = tmp_path_factory.mktemp("abc") - derivatives = ["derivatives/derivatives" + str(i) for i in range(17)] + derivatives = [Path("derivatives", "derivatives" + str(i)) for i in range(17)] bids_subdirectories = ["", "sourcedata", *derivatives] From 860e36f99752023788ec6764ceeb38ef56bc6e72 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:52:24 +0100 Subject: [PATCH 14/22] raw strings to allow windows path --- mne_bids/tests/test_path.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 783875c45..214121f11 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -222,12 +222,12 @@ def equal_length_subj_ids(v): timed_all, timed_ignored_nosub = ( timeit.timeit( "mne_bids.find_matching_paths(tmp_bids_root)", - setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'", number=1, ), timeit.timeit( "mne_bids.find_matching_paths(tmp_bids_root, ignore_nosub=True)", - setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'", number=10, ) / 10, @@ -244,12 +244,12 @@ def equal_length_subj_ids(v): timed_entity, timed_entity_match = ( timeit.timeit( "mne_bids.get_entity_vals(tmp_bids_root, 'session')", - setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'", number=1, ), timeit.timeit( "mne_bids.get_entity_vals(tmp_bids_root, 'session', include_match='sub-*/')", # noqa: E501 - setup="import mne_bids\ntmp_bids_root='" + str(tmp_bids_root) + "'", + setup="import mne_bids\ntmp_bids_root=r'" + str(tmp_bids_root) + "'", number=10, ) / 10, From 67a6717b884e63788b4ca54e526d8802e59b81b5 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:19:37 +0100 Subject: [PATCH 15/22] spam f-string formatting --- mne_bids/tests/test_path.py | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 214121f11..6530e862d 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -172,50 +172,45 @@ def test_path_benchmark(tmp_path_factory): """Benchmark exploring bids tree.""" tmp_bids_root = tmp_path_factory.mktemp("abc") - derivatives = [Path("derivatives", "derivatives" + str(i)) for i in range(17)] + derivatives = [Path(f"derivatives", f"derivatives" + str(i)) for i in range(17)] - bids_subdirectories = ["", "sourcedata", *derivatives] - - def equal_length_subj_ids(v): - assert all(v > 0) - n_digits = len(str(np.max(v))) + 1 - return list(map(lambda i: "0" * (n_digits - len(str(i))) + str(i), v)) + bids_subdirectories = [f"", f"sourcedata", *derivatives] # Create a BIDS compliant directory tree with high number of branches - for i in equal_length_subj_ids(np.arange(1, 20, dtype=int)): + for i in range(1, 20): for j in range(1, 9): for subdir in bids_subdirectories: - for datatype in ["eeg", "meg"]: + for datatype in [f"eeg", f"meg"]: bids_subdir = BIDSPath( - subject=i, - session="0" + str(j), + subject=str(i), + session=str(j), datatype=datatype, - task="audvis", + task=f"audvis", root=str(tmp_bids_root / subdir), ) bids_subdir.mkdir(exist_ok=True) - Path(bids_subdir.root / "participants.tsv").touch() - Path(bids_subdir.root / "participants.csv").touch() - Path(bids_subdir.root / "README").touch() + Path(bids_subdir.root / f"participants.tsv").touch() + Path(bids_subdir.root / f"participants.csv").touch() + Path(bids_subdir.root / f"README").touch() # os.makedirs(bids_subdir.directory, exist_ok=True) Path( - bids_subdir.directory, bids_subdir.basename + "_events.tsv" + bids_subdir.directory, bids_subdir.basename + f"_events.tsv" ).touch() Path( - bids_subdir.directory, bids_subdir.basename + "_events.csv" + bids_subdir.directory, bids_subdir.basename + f"_events.csv" ).touch() - if datatype == "meg": + if datatype == f"meg": ctf_path = Path( - bids_subdir.directory, bids_subdir.basename + "_meg.ds" + bids_subdir.directory, bids_subdir.basename + f"_meg.ds" ) ctf_path.mkdir(exist_ok=True) - Path(ctf_path, bids_subdir.basename + ".meg4").touch() - Path(ctf_path, bids_subdir.basename + ".hc").touch() - Path(ctf_path / "hz.ds").mkdir(exist_ok=True) - Path(ctf_path / "hz.ds" / "hz.meg4").touch() - Path(ctf_path / "hz.ds" / "hz.hc").touch() + Path(ctf_path, bids_subdir.basename + f".meg4").touch() + Path(ctf_path, bids_subdir.basename + f".hc").touch() + Path(ctf_path / f"hz.ds").mkdir(exist_ok=True) + Path(ctf_path / f"hz.ds" / f"hz.meg4").touch() + Path(ctf_path / f"hz.ds" / f"hz.hc").touch() # apply nosub on find_matching_matchs with root level bids directory should # yield a performance boost of order of length from bids_subdirectories. From 7399903e7c84ef6f8cc6bf904d983a91ff5899cf Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:20:13 +0100 Subject: [PATCH 16/22] improve readability test_path_benchmark --- mne_bids/tests/test_path.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 6530e862d..d2fdaaea6 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -170,15 +170,20 @@ def test_get_entity_vals(entity, expected_vals, kwargs, return_bids_test_dir): def test_path_benchmark(tmp_path_factory): """Benchmark exploring bids tree.""" - tmp_bids_root = tmp_path_factory.mktemp("abc") + # This benchmark is to verify the speed-up in function call get_entity_vals with + # `include_match=sub-*/` in face of a bids tree which hosts derivatives and sourcedata. + n_subjects = 20 + n_sessions = 10 + n_derivatives = 17 + tmp_bids_root = tmp_path_factory.mktemp("mnebids_utils_test_bids_ds") - derivatives = [Path(f"derivatives", f"derivatives" + str(i)) for i in range(17)] + derivatives = [Path(f"derivatives", f"derivatives" + str(i)) for i in range(n_derivatives)] bids_subdirectories = [f"", f"sourcedata", *derivatives] # Create a BIDS compliant directory tree with high number of branches - for i in range(1, 20): - for j in range(1, 9): + for i in range(1, n_subjects): + for j in range(1, n_sessions): for subdir in bids_subdirectories: for datatype in [f"eeg", f"meg"]: bids_subdir = BIDSPath( From e3c055af4907b1f4dac2ed29ebdc7f6050e1e53d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Feb 2025 17:29:59 +0000 Subject: [PATCH 17/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne_bids/tests/test_path.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index d2fdaaea6..72ef5b5ad 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -177,45 +177,47 @@ def test_path_benchmark(tmp_path_factory): n_derivatives = 17 tmp_bids_root = tmp_path_factory.mktemp("mnebids_utils_test_bids_ds") - derivatives = [Path(f"derivatives", f"derivatives" + str(i)) for i in range(n_derivatives)] + derivatives = [ + Path("derivatives", "derivatives" + str(i)) for i in range(n_derivatives) + ] - bids_subdirectories = [f"", f"sourcedata", *derivatives] + bids_subdirectories = ["", "sourcedata", *derivatives] # Create a BIDS compliant directory tree with high number of branches for i in range(1, n_subjects): for j in range(1, n_sessions): for subdir in bids_subdirectories: - for datatype in [f"eeg", f"meg"]: + for datatype in ["eeg", "meg"]: bids_subdir = BIDSPath( subject=str(i), session=str(j), datatype=datatype, - task=f"audvis", + task="audvis", root=str(tmp_bids_root / subdir), ) bids_subdir.mkdir(exist_ok=True) - Path(bids_subdir.root / f"participants.tsv").touch() - Path(bids_subdir.root / f"participants.csv").touch() - Path(bids_subdir.root / f"README").touch() + Path(bids_subdir.root / "participants.tsv").touch() + Path(bids_subdir.root / "participants.csv").touch() + Path(bids_subdir.root / "README").touch() # os.makedirs(bids_subdir.directory, exist_ok=True) Path( - bids_subdir.directory, bids_subdir.basename + f"_events.tsv" + bids_subdir.directory, bids_subdir.basename + "_events.tsv" ).touch() Path( - bids_subdir.directory, bids_subdir.basename + f"_events.csv" + bids_subdir.directory, bids_subdir.basename + "_events.csv" ).touch() - if datatype == f"meg": + if datatype == "meg": ctf_path = Path( - bids_subdir.directory, bids_subdir.basename + f"_meg.ds" + bids_subdir.directory, bids_subdir.basename + "_meg.ds" ) ctf_path.mkdir(exist_ok=True) - Path(ctf_path, bids_subdir.basename + f".meg4").touch() - Path(ctf_path, bids_subdir.basename + f".hc").touch() - Path(ctf_path / f"hz.ds").mkdir(exist_ok=True) - Path(ctf_path / f"hz.ds" / f"hz.meg4").touch() - Path(ctf_path / f"hz.ds" / f"hz.hc").touch() + Path(ctf_path, bids_subdir.basename + ".meg4").touch() + Path(ctf_path, bids_subdir.basename + ".hc").touch() + Path(ctf_path / "hz.ds").mkdir(exist_ok=True) + Path(ctf_path / "hz.ds" / "hz.meg4").touch() + Path(ctf_path / "hz.ds" / "hz.hc").touch() # apply nosub on find_matching_matchs with root level bids directory should # yield a performance boost of order of length from bids_subdirectories. From d659a3601e82461438c9df6f4b76a61ce1283c20 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:32:34 +0100 Subject: [PATCH 18/22] Suggest path match with private func generalization Co-authored-by: Stefan Appelhoff --- mne_bids/path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index e0f93c5b5..575405bfe 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2571,7 +2571,8 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False p for p in paths if (p.is_file() and p.suffix != ".json") - # do we really want to do this here? + # XXX: generalize with a private func that takes + # a config of which "data format" are to be expected like .ds or (p.is_dir() and p.suffix == ".ds") ] else: From 97072cb37002b8723e9c7167cd398f276dbb160a Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:35:04 +0100 Subject: [PATCH 19/22] Suggest path match with private func generalization Co-authored-by: Stefan Appelhoff --- mne_bids/path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 575405bfe..32a944c32 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2580,7 +2580,7 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False p for p in paths if p.is_file() - # do we really want to do this here + # XXX: see above, generalize with private func or (p.is_dir() and p.suffix == ".ds") ] From dbd486738633d082c7ad86089abaad5a6d973a16 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 18:48:14 +0100 Subject: [PATCH 20/22] Correct verionadded items Co-authored-by: Stefan Appelhoff --- mne_bids/path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 32a944c32..08d7a352f 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2010,7 +2010,7 @@ def get_entity_vals( Directories nested directly within ``root`` to ignore. If ``None``, include all directories in the search. - .. versionadded:: 0.17 + .. versionadded:: 0.9 ignore_suffixes : str | array-like of str | None Suffixes to ignore. If ``None``, include all suffixes. This can be helpful for ignoring non-data sidecars such as `*_scans.tsv` or `*_coordsystem.json`. @@ -2018,7 +2018,7 @@ def get_entity_vals( Apply a starting match pragma following Unix style pattern syntax from package glob to prefilter search criterion. - .. versionadded:: 0.9 + .. versionadded:: 0.17 with_key : bool If ``True``, returns the full entity with the key and the value. This will for example look like ``['sub-001', 'sub-002']``. From df82998af806024cbdfc9631b010b5c95f051686 Mon Sep 17 00:00:00 2001 From: waldie11 <86674066+waldie11@users.noreply.github.com> Date: Thu, 20 Feb 2025 19:38:10 +0100 Subject: [PATCH 21/22] make pre-commit happy --- mne_bids/tests/test_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py index 72ef5b5ad..43fd2a37d 100644 --- a/mne_bids/tests/test_path.py +++ b/mne_bids/tests/test_path.py @@ -171,7 +171,7 @@ def test_get_entity_vals(entity, expected_vals, kwargs, return_bids_test_dir): def test_path_benchmark(tmp_path_factory): """Benchmark exploring bids tree.""" # This benchmark is to verify the speed-up in function call get_entity_vals with - # `include_match=sub-*/` in face of a bids tree which hosts derivatives and sourcedata. + # `include_match=sub-*/` in face of a bids tree hosting derivatives and sourcedata. n_subjects = 20 n_sessions = 10 n_derivatives = 17 From 590f14e8dd503196db50f681b17803980331c3d9 Mon Sep 17 00:00:00 2001 From: Stefan Appelhoff Date: Thu, 20 Feb 2025 21:15:14 +0100 Subject: [PATCH 22/22] Apply suggestions from code review --- mne_bids/path.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mne_bids/path.py b/mne_bids/path.py index 08d7a352f..05ce32a5f 100644 --- a/mne_bids/path.py +++ b/mne_bids/path.py @@ -2546,10 +2546,7 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False else: if datatype is not None: datatype = _ensure_tuple(datatype) - search_str = f"**/{'|'.join(datatype)}/*" - - # I think this one is more appropriate - search_str = search_str + ".*" + search_str = f"**/{'|'.join(datatype)}/.*" else: search_str = "**/*.*"