@@ -1105,7 +1105,8 @@ def match(self, *, ignore_json=True, ignore_nosub=False, check=False):
11051105 datatype = self .datatype ,
11061106 ignore_json = ignore_json ,
11071107 ignore_nosub = ignore_nosub ,
1108- )
1108+ entities = self .entities
1109+ )
11091110
11101111 fnames = _filter_fnames (
11111112 paths , suffix = self .suffix , extension = self .extension , ** self .entities
@@ -2542,8 +2543,8 @@ def find_matching_paths(
25422543 return bids_paths
25432544
25442545
2545- def _return_root_paths (root , datatype = None , ignore_json = True , ignore_nosub = False ):
2546- """Return all file paths + .ds paths in root.
2546+ def _return_root_paths (root , datatype = None , ignore_json = True , ignore_nosub = False , entities = None ):
2547+ """Return all file paths + .ds paths in root with entity-aware optimization .
25472548
25482549 Can be filtered by datatype (which is present in the path but not in
25492550 the BIDSPath basename). Can also be list of datatypes.
@@ -2560,6 +2561,9 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
25602561 ignore_nosub : bool
25612562 If ``True``, return only files of the form ``root/sub-*``. Defaults to
25622563 ``False``.
2564+ entities : dict | None
2565+ Dictionary of BIDS entities to enable targeted directory scanning.
2566+ If provided with 'subject', will scan only that subject's directory.
25632567
25642568 Returns
25652569 -------
@@ -2568,30 +2572,75 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
25682572 """
25692573 root = Path (root ) # if root is str
25702574
2571- if datatype is None and not ignore_nosub :
2572- paths = root .rglob ("*.*" )
2573- else :
2575+ # OPTIMIZATION: Use entity-aware path construction when entities available
2576+ if entities and entities .get ('subject' ):
2577+ # Build targeted search path starting from subject directory
2578+ search_parts = [f"sub-{ entities ['subject' ]} " ]
2579+
2580+ # Add session if available
2581+ if entities .get ('session' ):
2582+ search_parts .append (f"ses-{ entities ['session' ]} " )
2583+
2584+ # Add datatype-specific path
25742585 if datatype is not None :
25752586 datatype = _ensure_tuple (datatype )
2576- search_str = f"**/{ '|' .join (datatype )} /*.*"
2587+ if len (datatype ) == 1 :
2588+ # Single datatype - construct direct path
2589+ search_parts .extend (["**" , datatype [0 ]])
2590+ search_str = "/" .join (search_parts ) + "/*.*"
2591+ else :
2592+ # Multiple datatypes - search each separately
2593+ paths = []
2594+ for dt in datatype :
2595+ dt_search_parts = search_parts + ["**" , dt ]
2596+ dt_search_str = "/" .join (dt_search_parts ) + "/*.*"
2597+ paths .extend ([
2598+ Path (root , fn )
2599+ for fn in glob .iglob (dt_search_str , root_dir = root , recursive = True )
2600+ ])
2601+ return _filter_paths_optimized (paths , ignore_json )
25772602 else :
2578- search_str = "**/*.*"
2579-
2580- # only browse files which are of the form root/sub-*,
2581- # such that we truely only look in 'sub'-folders:
2582- if ignore_nosub :
2583- search_str = f"sub-*/{ search_str } "
2584- # TODO: Why is this not equivalent to list(root.rglob(search_str)) ?
2585- # Most of the speedup is from using glob.iglob here.
2603+ # No datatype specified - search all datatypes under subject
2604+ search_parts .append ("**" )
2605+ search_str = "/" .join (search_parts ) + "/*.*"
2606+
2607+ # Single search with optimized path
25862608 paths = [
25872609 Path (root , fn )
25882610 for fn in glob .iglob (search_str , root_dir = root , recursive = True )
25892611 ]
2612+
2613+ else :
2614+ # FALLBACK: Original implementation when entities not available or subject unknown
2615+ if datatype is None and not ignore_nosub :
2616+ paths = root .rglob ("*.*" )
2617+ else :
2618+ if datatype is not None :
2619+ datatype = _ensure_tuple (datatype )
2620+ search_str = f"**/{ '|' .join (datatype )} /*.*"
2621+ else :
2622+ search_str = "**/*.*"
2623+
2624+ # only browse files which are of the form root/sub-*,
2625+ # such that we truely only look in 'sub'-folders:
2626+ if ignore_nosub :
2627+ search_str = f"sub-*/{ search_str } "
2628+ # TODO: Why is this not equivalent to list(root.rglob(search_str)) ?
2629+ # Most of the speedup is from using glob.iglob here.
2630+ paths = [
2631+ Path (root , fn )
2632+ for fn in glob .iglob (search_str , root_dir = root , recursive = True )
2633+ ]
25902634
2635+ return _filter_paths_optimized (paths , ignore_json )
2636+
2637+
2638+ def _filter_paths_optimized (paths , ignore_json ):
2639+ """Filter paths based on file type criteria - extracted for reuse."""
25912640 # Only keep files (not directories), ...
25922641 # and omit the JSON sidecars if `ignore_json` is True.
25932642 if ignore_json :
2594- paths = [
2643+ return [
25952644 p
25962645 for p in paths
25972646 if (p .is_file () and p .suffix != ".json" )
@@ -2600,16 +2649,14 @@ def _return_root_paths(root, datatype=None, ignore_json=True, ignore_nosub=False
26002649 or (p .is_dir () and p .suffix == ".ds" )
26012650 ]
26022651 else :
2603- paths = [
2652+ return [
26042653 p
26052654 for p in paths
26062655 if p .is_file ()
26072656 # XXX: see above, generalize with private func
26082657 or (p .is_dir () and p .suffix == ".ds" )
26092658 ]
26102659
2611- return paths
2612-
26132660
26142661def _fnames_to_bidspaths (fnames , root , check = False ):
26152662 """Make BIDSPaths from file names.
@@ -2655,3 +2702,4 @@ def _fnames_to_bidspaths(fnames, root, check=False):
26552702
26562703 bids_paths .append (bids_path )
26572704 return bids_paths
2705+
0 commit comments