|
| 1 | +import json |
| 2 | +from functools import lru_cache |
| 3 | +from typing import Any, Generator |
| 4 | + |
| 5 | +from ._entities import _cache_parse_bids_entities |
| 6 | +from ._indexing import _is_bids_dataset |
| 7 | +from ._pathlib import PathT, as_path |
| 8 | + |
| 9 | + |
| 10 | +def load_bids_metadata(path: str | PathT, inherit: bool = True) -> dict[str, Any]: |
| 11 | + """Load the full JSON sidecar metadata for a BIDS file. |
| 12 | +
|
| 13 | + Sidecar files are loaded according to the inheritance principle in top-down order. |
| 14 | +
|
| 15 | + Args: |
| 16 | + path: BIDS file path |
| 17 | + inherit: Load the full metadata according to inheritance. Otherwise, load only |
| 18 | + the first JSON sidecar found in the bottom-up search. |
| 19 | +
|
| 20 | + Returns: |
| 21 | + A sidecar metadata dictionary. |
| 22 | + """ |
| 23 | + path = as_path(path) |
| 24 | + entities = _cache_parse_bids_entities(path) |
| 25 | + query = dict(entities, ext=".json") |
| 26 | + |
| 27 | + metadata = {} |
| 28 | + |
| 29 | + parent = path.parent |
| 30 | + if inherit: |
| 31 | + sidecars = reversed(list(_find_bids_parents(parent, query))) |
| 32 | + else: |
| 33 | + sidecars = [next(_find_bids_parents(parent, query))] |
| 34 | + |
| 35 | + for path in sidecars: |
| 36 | + try: |
| 37 | + data = _load_json(path) |
| 38 | + metadata.update(data) |
| 39 | + except (json.JSONDecodeError, TypeError): |
| 40 | + continue |
| 41 | + return metadata |
| 42 | + |
| 43 | + |
| 44 | +@lru_cache |
| 45 | +def _load_json(path: PathT) -> Any: |
| 46 | + return json.loads(path.read_text()) |
| 47 | + |
| 48 | + |
| 49 | +def _find_bids_parents( |
| 50 | + start: PathT, |
| 51 | + query: dict[str, str], |
| 52 | +) -> Generator[PathT, None, None]: |
| 53 | + """Find all BIDS files satisfying the inheritance principle for `query`. |
| 54 | +
|
| 55 | + Args: |
| 56 | + start: Starting directory to begin the bottom up search. |
| 57 | + query: Dictionary of key-value entity pairs. The entities for valid parent files |
| 58 | + are sub-dictionaries of the query. |
| 59 | +
|
| 60 | + Yields: |
| 61 | + Matching paths in bottom-up order. |
| 62 | + """ |
| 63 | + suffix = query.get("suffix") |
| 64 | + ext = query.get("ext") |
| 65 | + if not (suffix or ext): |
| 66 | + raise ValueError("At least one of 'suffix' or 'ext' are required in query.") |
| 67 | + pattern = f"*{suffix}{ext}" if suffix else f"*{ext}" |
| 68 | + |
| 69 | + parent = start.resolve() |
| 70 | + if not parent.is_dir(): |
| 71 | + parent = parent.parent |
| 72 | + |
| 73 | + while parent.name: |
| 74 | + for path in _glob(parent, pattern): |
| 75 | + entities = _cache_parse_bids_entities(path) |
| 76 | + if _test_bids_inheritance(query, entities): |
| 77 | + yield path |
| 78 | + # Stop climbing if we find a BIDS dataset root. |
| 79 | + # NOTE: This will also stop at a nested dataset. Are there cases where we need |
| 80 | + # to load metadata from the parent dataset? |
| 81 | + if _is_bids_dataset(parent): |
| 82 | + break |
| 83 | + parent = parent.parent |
| 84 | + |
| 85 | + |
| 86 | +@lru_cache() |
| 87 | +def _glob(path: PathT, pattern: str) -> list[PathT]: |
| 88 | + return list(path.glob(pattern)) |
| 89 | + |
| 90 | + |
| 91 | +def _test_bids_inheritance(query: dict[str, str], entities: dict[str, str]) -> bool: |
| 92 | + """Test if entities satisfies the inheritance principle for query.""" |
| 93 | + entities = {k: v for k, v in entities.items() if k != "datatype"} |
| 94 | + return set(entities).issubset(query) and all( |
| 95 | + query[k] == v for k, v in entities.items() |
| 96 | + ) |
0 commit comments