Skip to content

Commit 107de32

Browse files
authored
Merge pull request #859 from oesteban/fix/matching-paths
FIX: Match only within relative path when indexer is validating
2 parents 9f01802 + ba43619 commit 107de32

File tree

3 files changed

+171
-52
lines changed

3 files changed

+171
-52
lines changed

bids/layout/index.py

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,62 @@ def _extract_entities(bidsfile, entities):
2626
return match_vals
2727

2828

29-
def _check_path_matches_patterns(path, patterns):
29+
def _check_path_matches_patterns(path, patterns, root=None):
3030
"""Check if the path matches at least one of the provided patterns. """
3131
if not patterns:
3232
return False
33+
3334
path = path.absolute()
35+
if root is not None:
36+
path = Path("/") / path.relative_to(root)
37+
38+
# Path now can be downcast to str
39+
path = str(path)
3440
for patt in patterns:
35-
if isinstance(patt, Path):
36-
if path == patt:
41+
if hasattr(patt, "search"):
42+
if patt.search(path):
3743
return True
38-
elif patt.search(str(path)):
39-
return True
44+
else:
45+
continue
46+
47+
if isinstance(patt, Path):
48+
patt = str(
49+
patt if root is None
50+
else Path("/") / patt.relative_to(root)
51+
)
52+
if str(patt) in path:
53+
return str(patt)
54+
4055
return False
4156

4257

58+
def _validate_path(path, incl_patt=None, excl_patt=None, root=None, default=None):
59+
incl_matched = _check_path_matches_patterns(path, incl_patt, root=root)
60+
excl_matched = _check_path_matches_patterns(path, excl_patt, root=root)
61+
62+
if incl_matched is False and excl_matched is False:
63+
return default
64+
65+
# Include: if a inclusion regex pattern matched or a nonregex inclusion pattern matched
66+
if incl_matched is True or (incl_matched and not excl_matched):
67+
return True
68+
69+
if excl_matched is True:
70+
return False
71+
72+
if not incl_matched and excl_matched:
73+
return False
74+
75+
# Both matched: nested pattern
76+
if incl_matched not in excl_matched:
77+
return True
78+
79+
if excl_matched not in incl_matched:
80+
return False
81+
82+
return default
83+
84+
4385
class BIDSLayoutIndexer:
4486
""" Indexer class for BIDSLayout.
4587
@@ -114,19 +156,16 @@ def __call__(self, layout):
114156
def session(self):
115157
return self._layout.connection_manager.session
116158

117-
def _validate_dir(self, d, default=None):
118-
if _check_path_matches_patterns(d, self._include_patterns):
119-
return True
120-
if _check_path_matches_patterns(d, self._exclude_patterns):
121-
return False
122-
return default
123-
124159
def _validate_file(self, f, default=None):
125-
# Inclusion takes priority over exclusion
126-
if _check_path_matches_patterns(f, self._include_patterns):
127-
return True
128-
if _check_path_matches_patterns(f, self._exclude_patterns):
129-
return False
160+
matched_patt = _validate_path(
161+
f,
162+
incl_patt=self._include_patterns,
163+
excl_patt=self._exclude_patterns,
164+
default=default,
165+
root=self._layout._root
166+
)
167+
if matched_patt is not None:
168+
return matched_patt
130169

131170
# If inclusion/exclusion is inherited from a parent directory, that
132171
# takes precedence over the remaining file-level rules
@@ -173,7 +212,13 @@ def _index_dir(self, path, config, default_action=None):
173212
_, dirnames, filenames = next(os.walk(path))
174213

175214
# Set the default inclusion/exclusion directive
176-
default = self._validate_dir(path, default=default_action)
215+
default = _validate_path(
216+
path,
217+
incl_patt=self._include_patterns,
218+
excl_patt=self._exclude_patterns,
219+
default=default_action,
220+
root=self._layout._root,
221+
)
177222

178223
# If layout configuration file exists, delete it
179224
if self.config_filename in filenames:
@@ -192,7 +237,6 @@ def _index_dir(self, path, config, default_action=None):
192237
d = path / d
193238
self._index_dir(d, list(config), default_action=default)
194239

195-
196240
def _index_file(self, f, dirpath, entities, default_action=None):
197241
"""Create DB record for file and its tags. """
198242
abs_fn = dirpath / f

bids/layout/tests/test_layout.py

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from bids.layout import BIDSLayout, Query
1515
from bids.layout.models import Config
16-
from bids.layout.index import BIDSLayoutIndexer
16+
from bids.layout.index import BIDSLayoutIndexer, _check_path_matches_patterns
1717
from bids.layout.utils import PaddedInt
1818
from bids.tests import get_test_data_path
1919
from bids.utils import natural_sort
@@ -43,7 +43,11 @@ def test_layout_init(layout_7t_trt):
4343
])
4444
def test_index_metadata(index_metadata, query, result, mock_config):
4545
data_dir = join(get_test_data_path(), '7t_trt')
46-
layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query)
46+
layout = BIDSLayout(
47+
data_dir,
48+
indexer=BIDSLayoutIndexer(index_metadata=index_metadata),
49+
**query
50+
)
4751
sample_file = layout.get(task='rest', extension='.nii.gz',
4852
acquisition='fullbrain')[0]
4953
metadata = sample_file.get_metadata()
@@ -425,22 +429,39 @@ def test_nested_include_exclude():
425429
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
426430
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')
427431

428-
# Nest a directory exclusion within an inclusion
429-
layout = BIDSLayout(data_dir, validate=True, force_index=['models'],
430-
ignore=[os.path.join('models', 'extras')])
431-
assert layout.get_file(target1)
432-
assert not layout.get_file(target2)
433-
434432
# Nest a directory inclusion within an exclusion
435-
layout = BIDSLayout(data_dir, validate=True, ignore=['models'],
436-
force_index=[os.path.join('models', 'extras')])
433+
layout = BIDSLayout(
434+
data_dir,
435+
indexer=BIDSLayoutIndexer(
436+
validate=True,
437+
force_index=[os.path.join('models', 'extras')],
438+
ignore=['models'],
439+
),
440+
)
437441
assert not layout.get_file(target1)
438442
assert layout.get_file(target2)
439443

444+
# Nest a directory exclusion within an inclusion
445+
layout = BIDSLayout(
446+
data_dir,
447+
indexer=BIDSLayoutIndexer(
448+
validate=True,
449+
force_index=['models'],
450+
ignore=[os.path.join('models', 'extras')],
451+
),
452+
)
453+
assert layout.get_file(target1)
454+
assert not layout.get_file(target2)
455+
440456
# Force file inclusion despite directory-level exclusion
441-
models = ['models', target2]
442-
layout = BIDSLayout(data_dir, validate=True, force_index=models,
443-
ignore=[os.path.join('models', 'extras')])
457+
layout = BIDSLayout(
458+
data_dir,
459+
indexer=BIDSLayoutIndexer(
460+
validate=True,
461+
force_index=['models', target2],
462+
ignore=[os.path.join('models', 'extras')],
463+
),
464+
)
444465
assert layout.get_file(target1)
445466
assert layout.get_file(target2)
446467

@@ -453,11 +474,17 @@ def test_nested_include_exclude_with_regex():
453474
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
454475
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')
455476

456-
layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1])
477+
layout = BIDSLayout(
478+
data_dir,
479+
indexer=BIDSLayoutIndexer(ignore=[patt2], force_index=[patt1])
480+
)
457481
assert layout.get_file(target1)
458482
assert not layout.get_file(target2)
459483

460-
layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2])
484+
layout = BIDSLayout(
485+
data_dir,
486+
indexer=BIDSLayoutIndexer(ignore=[patt1], force_index=[patt2])
487+
)
461488
assert not layout.get_file(target1)
462489
assert layout.get_file(target2)
463490

@@ -835,3 +862,33 @@ def test_padded_run_roundtrip(layout_ds005):
835862
assert ents["run"] == 1
836863
newpath = layout_ds005.build_path(ents, absolute_paths=False)
837864
assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz"
865+
866+
@pytest.mark.parametrize(
867+
"fname", [
868+
"sub-01/anat/sub-01_T1w.nii.gz",
869+
".datalad",
870+
"code",
871+
"sub-01/.datalad",
872+
],
873+
)
874+
def test_indexer_patterns(fname):
875+
root = Path("/home/user/.cache/data/")
876+
path = root / fname
877+
878+
assert bool(_check_path_matches_patterns(
879+
path,
880+
["code"],
881+
root=None,
882+
)) is (fname == "code")
883+
884+
assert _check_path_matches_patterns(
885+
path,
886+
[re.compile(r"/\.")],
887+
root=None,
888+
) is True
889+
890+
assert _check_path_matches_patterns(
891+
path,
892+
[re.compile(r"/\.")],
893+
root=root,
894+
) is (".datalad" in fname)

bids/layout/validation.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,13 @@
3030
k: val[k] for val in MANDATORY_DERIVATIVES_FIELDS.values() for k in val}
3131

3232

33-
DEFAULT_LOCATIONS_TO_IGNORE = ("code", "stimuli", "sourcedata", "models",
34-
re.compile(r'^\.'))
33+
DEFAULT_LOCATIONS_TO_IGNORE = {
34+
"code",
35+
"stimuli",
36+
"sourcedata",
37+
"models",
38+
re.compile(r'^\.'),
39+
}
3540

3641
def absolute_path_deprecation_warning():
3742
warnings.warn("The absolute_paths argument will be removed from PyBIDS "
@@ -172,27 +177,40 @@ def check_for_description(bids_dir):
172177
return paths
173178

174179

180+
def _sort_patterns(patterns, root):
181+
"""Return sorted patterns, from more specific to more general."""
182+
regexes = [patt for patt in patterns if hasattr(patt, "search")]
183+
184+
paths = [
185+
str((root / patt).absolute())
186+
for patt in listify(patterns)
187+
if not hasattr(patt, "search")
188+
]
189+
# Sort patterns from general to specific
190+
paths.sort(key=len)
191+
192+
# Combine and return (note path patterns are reversed, specific first)
193+
return [Path(p) for p in reversed(paths)] + regexes
194+
195+
175196
def validate_indexing_args(ignore, force_index, root):
176197
if ignore is None:
177-
ignore = DEFAULT_LOCATIONS_TO_IGNORE
198+
ignore = list(
199+
DEFAULT_LOCATIONS_TO_IGNORE - set(force_index or [])
200+
)
178201

179202
# root has already been validated to be a directory
180-
ignore = [(root / patt).absolute()
181-
if isinstance(patt, str) else patt
182-
for patt in listify(ignore or [])]
183-
force_index = [(root / patt).absolute()
184-
if isinstance(patt, str) else patt
185-
for patt in listify(force_index or [])]
203+
ignore = _sort_patterns(ignore, root)
204+
force_index = _sort_patterns(force_index or [], root)
186205

187206
# Derivatives get special handling; they shouldn't be indexed normally
188-
if force_index is not None:
189-
for entry in force_index:
190-
condi = (isinstance(entry, str) and
191-
str(entry.resolve()).startswith('derivatives'))
192-
if condi:
193-
msg = ("Do not pass 'derivatives' in the force_index "
194-
"list. To index derivatives, either set "
195-
"derivatives=True, or use add_derivatives().")
196-
raise ValueError(msg)
207+
for entry in force_index:
208+
condi = (isinstance(entry, str) and
209+
str(entry.resolve()).startswith('derivatives'))
210+
if condi:
211+
msg = ("Do not pass 'derivatives' in the force_index "
212+
"list. To index derivatives, either set "
213+
"derivatives=True, or use add_derivatives().")
214+
raise ValueError(msg)
197215

198216
return ignore, force_index

0 commit comments

Comments
 (0)