Skip to content

Commit d8bb7a2

Browse files
authored
Add utility to load JSON sidecar (#54)
Note, copied with some modification from bids2table v0.2
1 parent 28ee47e commit d8bb7a2

4 files changed

Lines changed: 136 additions & 1 deletion

File tree

bids2table/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,5 +159,6 @@
159159
get_bids_entity_arrow_schema,
160160
format_bids_path,
161161
)
162+
from ._metadata import load_bids_metadata
162163
from ._pathlib import cloudpathlib_is_available
163164
from ._version import *

bids2table/_indexing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import importlib.metadata
1010
import re
1111
from concurrent.futures import Executor, ProcessPoolExecutor
12-
from functools import partial
12+
from functools import lru_cache, partial
1313
from typing import Any, Callable, Generator, Iterable, Sequence
1414

1515
import pyarrow as pa
@@ -294,6 +294,7 @@ def _batch_index_func(root: str | PathT) -> tuple[str | None, pa.Table]:
294294
return dataset, table
295295

296296

297+
@lru_cache()
297298
def _get_bids_dataset(path: str | PathT) -> tuple[str | None, PathT | None]:
298299
"""Get the BIDS dataset that the path belongs to, if any.
299300
@@ -329,6 +330,7 @@ def _get_bids_dataset(path: str | PathT) -> tuple[str | None, PathT | None]:
329330
return dataset, root
330331

331332

333+
@lru_cache()
332334
def _is_bids_dataset(path: PathT) -> bool:
333335
"""Test if path is a BIDS dataset root directory."""
334336
# Quick heuristic checks.

bids2table/_metadata.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import json
2+
from functools import lru_cache
3+
from typing import Any, Generator
4+
5+
from ._entities import _cache_parse_bids_entities
6+
from ._indexing import _is_bids_dataset
7+
from ._pathlib import PathT, as_path
8+
9+
10+
def load_bids_metadata(path: str | PathT, inherit: bool = True) -> dict[str, Any]:
11+
"""Load the full JSON sidecar metadata for a BIDS file.
12+
13+
Sidecar files are loaded according to the inheritance principle in top-down order.
14+
15+
Args:
16+
path: BIDS file path
17+
inherit: Load the full metadata according to inheritance. Otherwise, load only
18+
the first JSON sidecar found in the bottom-up search.
19+
20+
Returns:
21+
A sidecar metadata dictionary.
22+
"""
23+
path = as_path(path)
24+
entities = _cache_parse_bids_entities(path)
25+
query = dict(entities, ext=".json")
26+
27+
metadata = {}
28+
29+
parent = path.parent
30+
if inherit:
31+
sidecars = reversed(list(_find_bids_parents(parent, query)))
32+
else:
33+
sidecars = [next(_find_bids_parents(parent, query))]
34+
35+
for path in sidecars:
36+
try:
37+
data = _load_json(path)
38+
metadata.update(data)
39+
except (json.JSONDecodeError, TypeError):
40+
continue
41+
return metadata
42+
43+
44+
@lru_cache
45+
def _load_json(path: PathT) -> Any:
46+
return json.loads(path.read_text())
47+
48+
49+
def _find_bids_parents(
50+
start: PathT,
51+
query: dict[str, str],
52+
) -> Generator[PathT, None, None]:
53+
"""Find all BIDS files satisfying the inheritance principle for `query`.
54+
55+
Args:
56+
start: Starting directory to begin the bottom up search.
57+
query: Dictionary of key-value entity pairs. The entities for valid parent files
58+
are sub-dictionaries of the query.
59+
60+
Yields:
61+
Matching paths in bottom-up order.
62+
"""
63+
suffix = query.get("suffix")
64+
ext = query.get("ext")
65+
if not (suffix or ext):
66+
raise ValueError("At least one of 'suffix' or 'ext' are required in query.")
67+
pattern = f"*{suffix}{ext}" if suffix else f"*{ext}"
68+
69+
parent = start.resolve()
70+
if not parent.is_dir():
71+
parent = parent.parent
72+
73+
while parent.name:
74+
for path in _glob(parent, pattern):
75+
entities = _cache_parse_bids_entities(path)
76+
if _test_bids_inheritance(query, entities):
77+
yield path
78+
# Stop climbing if we find a BIDS dataset root.
79+
# NOTE: This will also stop at a nested dataset. Are there cases where we need
80+
# to load metadata from the parent dataset?
81+
if _is_bids_dataset(parent):
82+
break
83+
parent = parent.parent
84+
85+
86+
@lru_cache()
87+
def _glob(path: PathT, pattern: str) -> list[PathT]:
88+
return list(path.glob(pattern))
89+
90+
91+
def _test_bids_inheritance(query: dict[str, str], entities: dict[str, str]) -> bool:
92+
"""Test if entities satisfies the inheritance principle for query."""
93+
entities = {k: v for k, v in entities.items() if k != "datatype"}
94+
return set(entities).issubset(query) and all(
95+
query[k] == v for k, v in entities.items()
96+
)

tests/test_metadata.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
from bids2table._metadata import load_bids_metadata
6+
from bids2table._pathlib import cloudpathlib_is_available
7+
8+
BIDS_EXAMPLES = Path(__file__).parents[1] / "bids-examples"
9+
10+
11+
@pytest.mark.parametrize("inherit", [True, False])
12+
def test_load_bids_metadata(inherit: bool):
13+
path = (
14+
BIDS_EXAMPLES
15+
/ "synthetic/derivatives/fmriprep/sub-01/ses-01/func"
16+
/ "sub-01_ses-01_task-rest_space-T1w_desc-preproc_bold.nii"
17+
)
18+
metadata = load_bids_metadata(path, inherit=inherit)
19+
expected_metadata = {
20+
"TaskName": "Rest",
21+
"RepetitionTime": 2.5,
22+
"Sources": ["bids:raw:sub-01/ses-01/sub-01_ses-01_task-rest_bold.nii"],
23+
}
24+
assert metadata == expected_metadata
25+
26+
27+
@pytest.mark.skipif(
28+
not cloudpathlib_is_available(), reason="cloudpathlib not installed"
29+
)
30+
def test_load_bids_metadata_s3():
31+
path = (
32+
"s3://openneuro.org/ds000102/sub-01/func/sub-01_task-flanker_run-1_bold.nii.gz"
33+
)
34+
metadata = load_bids_metadata(path)
35+
assert metadata["RepetitionTime"] == 2.0
36+
assert metadata["TaskName"] == "Flanker"

0 commit comments

Comments
 (0)