Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bids2table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,6 @@
get_bids_entity_arrow_schema,
format_bids_path,
)
from ._metadata import load_bids_metadata
from ._pathlib import cloudpathlib_is_available
from ._version import *
4 changes: 3 additions & 1 deletion bids2table/_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import importlib.metadata
import re
from concurrent.futures import Executor, ProcessPoolExecutor
from functools import partial
from functools import lru_cache, partial
from typing import Any, Callable, Generator, Iterable, Sequence

import pyarrow as pa
Expand Down Expand Up @@ -279,6 +279,7 @@ def _batch_index_func(root: str | PathT) -> tuple[str | None, pa.Table]:
return dataset, table


@lru_cache()
def _get_bids_dataset(path: str | PathT) -> tuple[str | None, PathT | None]:
"""Get the BIDS dataset that the path belongs to, if any.

Expand Down Expand Up @@ -314,6 +315,7 @@ def _get_bids_dataset(path: str | PathT) -> tuple[str | None, PathT | None]:
return dataset, root


@lru_cache()
def _is_bids_dataset(path: PathT) -> bool:
"""Test if path is a BIDS dataset root directory."""
# Check if contains a dataset_description.json or any subject directories. Note,
Expand Down
96 changes: 96 additions & 0 deletions bids2table/_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import json
from functools import lru_cache
from typing import Any, Generator

from ._entities import _cache_parse_bids_entities
from ._indexing import _is_bids_dataset
from ._pathlib import PathT, as_path


def load_bids_metadata(path: str | PathT, inherit: bool = True) -> dict[str, Any]:
"""Load the full JSON sidecar metadata for a BIDS file.

Sidecar files are loaded according to the inheritance principle in top-down order.

Args:
path: BIDS file path
inherit: Load the full metadata according to inheritance. Otherwise, load only
the first JSON sidecar found in the bottom-up search.

Returns:
A sidecar metadata dictionary.
"""
path = as_path(path)
entities = _cache_parse_bids_entities(path)
query = dict(entities, ext=".json")

metadata = {}

parent = path.parent
if inherit:
Comment thread
kaitj marked this conversation as resolved.
sidecars = reversed(list(_find_bids_parents(parent, query)))
else:
sidecars = [next(_find_bids_parents(parent, query))]

for path in sidecars:
try:
data = _load_json(path)
metadata.update(data)
Comment thread
kaitj marked this conversation as resolved.
except (json.JSONDecodeError, TypeError):
continue

Check warning on line 40 in bids2table/_metadata.py

View check run for this annotation

Codecov / codecov/patch

bids2table/_metadata.py#L39-L40

Added lines #L39 - L40 were not covered by tests
return metadata


@lru_cache
def _load_json(path: PathT) -> Any:
return json.loads(path.read_text())


def _find_bids_parents(
start: PathT,
query: dict[str, str],
) -> Generator[PathT, None, None]:
"""Find all BIDS files satisfying the inheritance principle for `query`.

Args:
start: Starting directory to begin the bottom up search.
query: Dictionary of key-value entity pairs. The entities for valid parent files
are sub-dictionaries of the query.

Yields:
Matching paths in bottom-up order.
"""
suffix = query.get("suffix")
ext = query.get("ext")
if not (suffix or ext):
raise ValueError("At least one of 'suffix' or 'ext' are required in query.")

Check warning on line 66 in bids2table/_metadata.py

View check run for this annotation

Codecov / codecov/patch

bids2table/_metadata.py#L66

Added line #L66 was not covered by tests
pattern = f"*{suffix}{ext}" if suffix else f"*{ext}"

parent = start.resolve()
if not parent.is_dir():
parent = parent.parent

Check warning on line 71 in bids2table/_metadata.py

View check run for this annotation

Codecov / codecov/patch

bids2table/_metadata.py#L71

Added line #L71 was not covered by tests

while parent.name:
for path in _glob(parent, pattern):
entities = _cache_parse_bids_entities(path)
if _test_bids_inheritance(query, entities):
yield path
# Stop climbing if we find a BIDS dataset root.
# NOTE: This will also stop at a nested dataset. Are there cases where we need
# to load metadata from the parent dataset?
if _is_bids_dataset(parent):
break
parent = parent.parent


@lru_cache()
def _glob(path: PathT, pattern: str) -> list[PathT]:
return list(path.glob(pattern))


def _test_bids_inheritance(query: dict[str, str], entities: dict[str, str]) -> bool:
"""Test if entities satisfies the inheritance principle for query."""
entities = {k: v for k, v in entities.items() if k != "datatype"}
return set(entities).issubset(query) and all(
query[k] == v for k, v in entities.items()
)
36 changes: 36 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from pathlib import Path

import pytest

from bids2table._metadata import load_bids_metadata
from bids2table._pathlib import cloudpathlib_is_available

BIDS_EXAMPLES = Path(__file__).parents[1] / "bids-examples"


@pytest.mark.parametrize("inherit", [True, False])
def test_load_bids_metadata(inherit: bool):
path = (
BIDS_EXAMPLES
/ "synthetic/derivatives/fmriprep/sub-01/ses-01/func"
/ "sub-01_ses-01_task-rest_space-T1w_desc-preproc_bold.nii"
)
metadata = load_bids_metadata(path, inherit=inherit)
expected_metadata = {
"TaskName": "Rest",
"RepetitionTime": 2.5,
"Sources": ["bids:raw:sub-01/ses-01/sub-01_ses-01_task-rest_bold.nii"],
}
assert metadata == expected_metadata


@pytest.mark.skipif(
not cloudpathlib_is_available(), reason="cloudpathlib not installed"
)
def test_load_bids_metadata_s3():
path = (
"s3://openneuro.org/ds000102/sub-01/func/sub-01_task-flanker_run-1_bold.nii.gz"
)
metadata = load_bids_metadata(path)
assert metadata["RepetitionTime"] == 2.0
assert metadata["TaskName"] == "Flanker"
Loading