Skip to content

Commit 1458d15

Browse files
author
Maximilien Chaumon
committed
optimize rootpath scan when entities are known
1 parent 941a12c commit 1458d15

File tree

1 file changed

+67
-19
lines changed

1 file changed

+67
-19
lines changed

mne_bids/path.py

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,7 +1105,8 @@ def match(self, *, ignore_json=True, ignore_nosub=False, check=False):
11051105
datatype=self.datatype,
11061106
ignore_json=ignore_json,
11071107
ignore_nosub=ignore_nosub,
1108-
)
1108+
entities=self.entities
1109+
)
11091110

11101111
fnames = _filter_fnames(
11111112
paths, suffix=self.suffix, extension=self.extension, **self.entities
@@ -2542,8 +2543,8 @@ def find_matching_paths(
25422543
return bids_paths
25432544

25442545

2545-
def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False):
2546-
"""Return all file paths + .ds paths in root.
2546+
def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False, entities=None):
2547+
"""Return all file paths + .ds paths in root with entity-aware optimization.
25472548
25482549
Can be filtered by datatype (which is present in the path but not in
25492550
the BIDSPath basename). Can also be list of datatypes.
@@ -2560,6 +2561,9 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
25602561
ignore_nosub : bool
25612562
If ``True``, return only files of the form ``root/sub-*``. Defaults to
25622563
``False``.
2564+
entities : dict | None
2565+
Dictionary of BIDS entities to enable targeted directory scanning.
2566+
If provided with 'subject', will scan only that subject's directory.
25632567
25642568
Returns
25652569
-------
@@ -2568,30 +2572,75 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
25682572
"""
25692573
root = Path(root) # if root is str
25702574

2571-
if datatype is None and not ignore_nosub:
2572-
paths = root.rglob("*.*")
2573-
else:
2575+
# OPTIMIZATION: Use entity-aware path construction when entities available
2576+
if entities and entities.get('subject'):
2577+
# Build targeted search path starting from subject directory
2578+
search_parts = [f"sub-{entities['subject']}"]
2579+
2580+
# Add session if available
2581+
if entities.get('session'):
2582+
search_parts.append(f"ses-{entities['session']}")
2583+
2584+
# Add datatype-specific path
25742585
if datatype is not None:
25752586
datatype = _ensure_tuple(datatype)
2576-
search_str = f"**/{'|'.join(datatype)}/*.*"
2587+
if len(datatype) == 1:
2588+
# Single datatype - construct direct path
2589+
search_parts.extend(["**", datatype[0]])
2590+
search_str = "/".join(search_parts) + "/*.*"
2591+
else:
2592+
# Multiple datatypes - search each separately
2593+
paths = []
2594+
for dt in datatype:
2595+
dt_search_parts = search_parts + ["**", dt]
2596+
dt_search_str = "/".join(dt_search_parts) + "/*.*"
2597+
paths.extend([
2598+
Path(root, fn)
2599+
for fn in glob.iglob(dt_search_str, root_dir=root, recursive=True)
2600+
])
2601+
return _filter_paths_optimized(paths, ignore_json)
25772602
else:
2578-
search_str = "**/*.*"
2579-
2580-
# only browse files which are of the form root/sub-*,
2581-
# such that we truely only look in 'sub'-folders:
2582-
if ignore_nosub:
2583-
search_str = f"sub-*/{search_str}"
2584-
# TODO: Why is this not equivalent to list(root.rglob(search_str)) ?
2585-
# Most of the speedup is from using glob.iglob here.
2603+
# No datatype specified - search all datatypes under subject
2604+
search_parts.append("**")
2605+
search_str = "/".join(search_parts) + "/*.*"
2606+
2607+
# Single search with optimized path
25862608
paths = [
25872609
Path(root, fn)
25882610
for fn in glob.iglob(search_str, root_dir=root, recursive=True)
25892611
]
2612+
2613+
else:
2614+
# FALLBACK: Original implementation when entities not available or subject unknown
2615+
if datatype is None and not ignore_nosub:
2616+
paths = root.rglob("*.*")
2617+
else:
2618+
if datatype is not None:
2619+
datatype = _ensure_tuple(datatype)
2620+
search_str = f"**/{'|'.join(datatype)}/*.*"
2621+
else:
2622+
search_str = "**/*.*"
2623+
2624+
# only browse files which are of the form root/sub-*,
2625+
# such that we truely only look in 'sub'-folders:
2626+
if ignore_nosub:
2627+
search_str = f"sub-*/{search_str}"
2628+
# TODO: Why is this not equivalent to list(root.rglob(search_str)) ?
2629+
# Most of the speedup is from using glob.iglob here.
2630+
paths = [
2631+
Path(root, fn)
2632+
for fn in glob.iglob(search_str, root_dir=root, recursive=True)
2633+
]
25902634

2635+
return _filter_paths_optimized(paths, ignore_json)
2636+
2637+
2638+
def _filter_paths_optimized(paths, ignore_json):
2639+
"""Filter paths based on file type criteria - extracted for reuse."""
25912640
# Only keep files (not directories), ...
25922641
# and omit the JSON sidecars if `ignore_json` is True.
25932642
if ignore_json:
2594-
paths = [
2643+
return [
25952644
p
25962645
for p in paths
25972646
if (p.is_file() and p.suffix != ".json")
@@ -2600,16 +2649,14 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
26002649
or (p.is_dir() and p.suffix == ".ds")
26012650
]
26022651
else:
2603-
paths = [
2652+
return [
26042653
p
26052654
for p in paths
26062655
if p.is_file()
26072656
# XXX: see above, generalize with private func
26082657
or (p.is_dir() and p.suffix == ".ds")
26092658
]
26102659

2611-
return paths
2612-
26132660

26142661
def _fnames_to_bidspaths(fnames, root, check=False):
26152662
"""Make BIDSPaths from file names.
@@ -2655,3 +2702,4 @@ def _fnames_to_bidspaths(fnames, root, check=False):
26552702

26562703
bids_paths.append(bids_path)
26572704
return bids_paths
2705+

0 commit comments

Comments
 (0)