Skip to content

Commit 3380af7

Browse files
committed
Switch custom rglob to glob library
- Use built-in method for ClouthPath's since `glob.glob` does not properly recurse through sub-directories. If it is a local path, just fall back to `glob.glob`. Alternatively, could just check for Python version and only run this if <py3.13.
1 parent 8f48a68 commit 3380af7

2 files changed

Lines changed: 14 additions & 36 deletions

File tree

bids2table/_indexing.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
import re
1111
from concurrent.futures import Executor, ProcessPoolExecutor
1212
from functools import lru_cache, partial
13+
from glob import glob
1314
from typing import Any, Callable, Generator, Iterable, Sequence
1415

1516
import pyarrow as pa
17+
from cloudpathlib import CloudPath
1618
from tqdm import tqdm
1719

1820
from ._entities import (
@@ -21,7 +23,7 @@
2123
validate_bids_entities,
2224
)
2325
from ._logging import setup_logger
24-
from ._pathlib import PathT, as_path, rglob
26+
from ._pathlib import PathT, as_path
2527

2628
_BIDS_SUBJECT_DIR_PATTERN = re.compile(r"sub-[a-zA-Z0-9]+")
2729

@@ -353,7 +355,7 @@ def _is_bids_dataset(path: PathT) -> bool:
353355
def _contains_bids_subject_dirs(root: PathT) -> bool:
354356
"""Check if a path contains one or more BIDS subject dirs."""
355357
# Nb, this will return on the first matching path thanks to the generator.
356-
return any(_is_bids_subject_dir(path) for path in root.glob(pattern="sub-*"))
358+
return any(_is_bids_subject_dir(path) for path in root.glob("sub-*"))
357359

358360

359361
def _find_bids_subject_dirs(
@@ -365,7 +367,7 @@ def _find_bids_subject_dirs(
365367
Note, only looks one level down. Does not find nested subject directories, e.g. in
366368
derivatives datasets.
367369
"""
368-
paths = [path for path in root.glob(pattern="sub-*") if _is_bids_subject_dir(path)]
370+
paths = [path for path in root.glob("sub-*") if _is_bids_subject_dir(path)]
369371

370372
if include_subjects:
371373
filtered_names = _filter_include(
@@ -400,7 +402,15 @@ def _index_bids_subject_dir(
400402
_, subject = path.name.split("-", maxsplit=1)
401403

402404
records = []
403-
for p in rglob(path=path, pattern="sub-*"):
405+
if isinstance(path, CloudPath):
406+
# glob fails to properly recurse CloudPaths, so use built-in method
407+
paths = path.rglob("sub-*")
408+
else:
409+
# Fall back to glob for any version of Python
410+
paths = glob(f"{path}/**/sub-*", recursive=True)
411+
412+
for p in paths:
413+
p = as_path(p)
404414
if _is_bids_file(p):
405415
entities = _cache_parse_bids_entities(p)
406416
valid_entities, extra_entities = validate_bids_entities(entities)

bids2table/_pathlib.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import fnmatch
2-
import os
3-
from collections.abc import Iterator
41
from pathlib import Path
52

63
try:
@@ -31,32 +28,3 @@ def as_path(path: str | PathT) -> PathT:
3128
def cloudpathlib_is_available() -> bool:
3229
"""Check if cloudpathlib is available."""
3330
return _CLOUDPATHLIB_AVAILABLE
34-
35-
36-
def rglob(path: PathT, pattern: str, follow_symlinks: bool = True) -> Iterator[PathT]:
37-
"""Safely glob paths, recursing symlinks.
38-
39-
NOTE: Only needed to support recursive globbing in <py3.13
40-
"""
41-
# Not expecting symlinks for cloudpaths so just use their glob method.
42-
if isinstance(path, CloudPath):
43-
yield from path.rglob(f"{pattern}")
44-
return
45-
46-
visited = set()
47-
for dirpath, dirnames, filenames in os.walk(path, followlinks=follow_symlinks):
48-
dirpath = as_path(dirpath)
49-
try:
50-
stat = os.stat(dirpath, follow_symlinks=False)
51-
except FileNotFoundError:
52-
continue # Broken symlink or race condition
53-
54-
inode = (stat.st_dev, stat.st_ino)
55-
if inode in visited:
56-
continue
57-
visited.add(inode)
58-
59-
all_names = [name for name in dirnames] + [name for name in filenames]
60-
for name in all_names:
61-
if fnmatch.fnmatch(name, pattern):
62-
yield dirpath / name

0 commit comments

Comments
 (0)