Skip to content

Commit 10182d3

Browse files
committed
Add ds000114 fixture infrastructure for longitudinal integration tests
Adds the download script, conftest fixtures, and CI cache that the Stage 2 longitudinal integration test (deferred from PR #306) and the upcoming Stages 3-6 tier-2 tests need. Pins OpenNeuro snapshot 1.0.2 via sha256 of task-fingerfootlips_bold.json so silent upstream drift (including the old missing-TaskName regression) is caught at fetch time. Also lands the deferred rbc longitudinal template integration test asserting the ses-longitudinal BIDS tree shape (template T1w plus from-test / from-retest ITK xfms). Refs #301, follow-up to #306.
1 parent eff2939 commit 10182d3

6 files changed

Lines changed: 349 additions & 0 deletions

File tree

.github/workflows/test_full.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ jobs:
3131
enable-cache: true # not automatic on self-hosted runners
3232
- run: uv sync
3333

34+
- name: Cache ds000114 longitudinal fixture
35+
uses: actions/cache@v5
36+
with:
37+
path: tests/data/ds000114
38+
key: ds000114-${{ hashFiles('scripts/download_ds000114.py') }}
39+
3440
- name: Run all tests
3541
shell: bash
3642
run: |

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,5 +172,6 @@ cython_debug/
172172
# Full-pipeline test artifacts
173173
tests/full_pipeline/.last_run.json
174174
tests/data/rbc_run/
175+
tests/data/ds000114/
175176
CLAUDE.md
176177
reports

scripts/download_ds000114.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# /// script
2+
# dependencies = []
3+
# requires-python = ">=3.12"
4+
# ///
5+
"""Download a sub-01 subset of OpenNeuro ds000114 for longitudinal tests.
6+
7+
ds000114 ("Test-Retest Reliability") is the canonical multi-session BIDS
8+
demo dataset. We pull sub-01 only (both ses-test and ses-retest) plus the
9+
top-level sidecars, because BIDS inheritance places RepetitionTime etc.
10+
in the dataset-level task JSON, not per-subject.
11+
12+
OpenNeuro snapshot: 1.0.2 (tagged 2022-08-24, verified to contain
13+
``TaskName`` in ``task-fingerfootlips_bold.json`` so bids-validator is
14+
happy). The public S3 mirror only exposes the latest state at top-level
15+
(``s3://openneuro.org/ds000114/...``), so we can't pin via a snapshot
16+
path. Instead we verify the sha256 of ``task-fingerfootlips_bold.json``
17+
after download and refuse to proceed if the content has drifted.
18+
19+
Runs against the public HTTP S3 endpoint with no credentials and no
20+
third-party dependencies, so it works on any environment where
21+
``uv run`` does.
22+
23+
Usage::
24+
25+
uv run scripts/download_ds000114.py [TARGET_DIR]
26+
27+
Default TARGET_DIR is ``tests/data/ds000114`` (under the repo root).
28+
Idempotent: exits early if the sentinel T1w file already exists.
29+
"""
30+
31+
from __future__ import annotations
32+
33+
import argparse
34+
import hashlib
35+
import logging
36+
import sys
37+
import urllib.error
38+
import urllib.parse
39+
import urllib.request
40+
import xml.etree.ElementTree as ET
41+
from pathlib import Path
42+
43+
SNAPSHOT_TAG = "1.0.2"
44+
S3_BASE = "https://s3.amazonaws.com/openneuro.org"
45+
DATASET = "ds000114"
46+
SUBJECT = "sub-01"
47+
SIDECARS = (
48+
"dataset_description.json",
49+
"participants.tsv",
50+
"task-fingerfootlips_bold.json",
51+
)
52+
TASK_JSON_SHA256 = "9fd44f65a772e05282c20bdfa2a9775e02f9a7f562c5c96bbf4fd30632540355"
53+
S3_XMLNS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
54+
55+
_DEFAULT_TARGET = Path(__file__).resolve().parent.parent / "tests" / "data" / DATASET
56+
_SENTINEL_REL = Path(SUBJECT) / "ses-test" / "anat" / f"{SUBJECT}_ses-test_T1w.nii.gz"
57+
58+
_logger = logging.getLogger("download_ds000114")
59+
60+
61+
def _list_keys(prefix: str) -> list[str]:
62+
"""Enumerate all S3 object keys beneath ``prefix`` (paginated)."""
63+
keys: list[str] = []
64+
continuation: str | None = None
65+
while True:
66+
query = f"list-type=2&prefix={prefix}"
67+
if continuation is not None:
68+
query += f"&continuation-token={urllib.parse.quote(continuation)}"
69+
with urllib.request.urlopen(f"{S3_BASE}/?{query}") as resp: # noqa: S310
70+
root = ET.parse(resp).getroot() # noqa: S314 (trusted S3 response)
71+
for contents in root.findall(f"{S3_XMLNS}Contents"):
72+
key = contents.findtext(f"{S3_XMLNS}Key")
73+
if key:
74+
keys.append(key)
75+
if root.findtext(f"{S3_XMLNS}IsTruncated") != "true":
76+
return keys
77+
continuation = root.findtext(f"{S3_XMLNS}NextContinuationToken")
78+
if not continuation:
79+
return keys
80+
81+
82+
def _download(key: str, dest: Path) -> None:
83+
"""Fetch ``key`` from the public openneuro S3 mirror to ``dest``."""
84+
dest.parent.mkdir(parents=True, exist_ok=True)
85+
url = f"{S3_BASE}/{key}"
86+
_logger.info("GET %s", key)
87+
with (
88+
urllib.request.urlopen(url) as resp, # noqa: S310
89+
dest.open("wb") as out,
90+
):
91+
while chunk := resp.read(1 << 20):
92+
out.write(chunk)
93+
94+
95+
def _sha256(path: Path) -> str:
96+
digest = hashlib.sha256()
97+
with path.open("rb") as fh:
98+
while chunk := fh.read(1 << 20):
99+
digest.update(chunk)
100+
return digest.hexdigest()
101+
102+
103+
def download(target_dir: Path) -> None:
104+
"""Download the ds000114 sub-01 subset into ``target_dir`` if missing."""
105+
sentinel = target_dir / _SENTINEL_REL
106+
if sentinel.exists():
107+
_logger.info("%s: already present at %s", DATASET, target_dir)
108+
return
109+
110+
_logger.info(
111+
"%s: downloading %s (both sessions) + sidecars to %s (snapshot %s)",
112+
DATASET,
113+
SUBJECT,
114+
target_dir,
115+
SNAPSHOT_TAG,
116+
)
117+
118+
subject_keys = _list_keys(f"{DATASET}/{SUBJECT}/")
119+
if not subject_keys:
120+
raise RuntimeError(
121+
f"No objects found under {DATASET}/{SUBJECT}/; bucket layout may "
122+
"have changed."
123+
)
124+
for key in subject_keys:
125+
rel = Path(key).relative_to(DATASET)
126+
_download(key, target_dir / rel)
127+
128+
for name in SIDECARS:
129+
_download(f"{DATASET}/{name}", target_dir / name)
130+
131+
actual = _sha256(target_dir / "task-fingerfootlips_bold.json")
132+
if actual != TASK_JSON_SHA256:
133+
raise RuntimeError(
134+
"task-fingerfootlips_bold.json sha256 mismatch; upstream may have "
135+
f"changed.\n expected: {TASK_JSON_SHA256}\n actual: {actual}\n"
136+
f" verify and update SNAPSHOT_TAG + TASK_JSON_SHA256."
137+
)
138+
_logger.info("%s: download complete", DATASET)
139+
140+
141+
def main(argv: list[str] | None = None) -> int:
142+
"""Entry point. Returns a shell-style exit code."""
143+
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
144+
parser.add_argument(
145+
"target_dir",
146+
nargs="?",
147+
type=Path,
148+
default=_DEFAULT_TARGET,
149+
help=f"Destination directory (default: {_DEFAULT_TARGET}).",
150+
)
151+
args = parser.parse_args(argv)
152+
153+
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
154+
try:
155+
download(args.target_dir)
156+
except urllib.error.URLError as exc:
157+
_logger.error("%s: network error: %s", DATASET, exc)
158+
return 2
159+
except RuntimeError as exc:
160+
_logger.error("%s: %s", DATASET, exc)
161+
return 3
162+
return 0
163+
164+
165+
if __name__ == "__main__":
166+
raise SystemExit(main())
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Longitudinal processing integration tests."""
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""Fixtures for longitudinal integration tests.
2+
3+
These fixtures own the ds000114 test dataset lifecycle (download + reuse)
4+
and run the cross-sectional anatomical stage once so that each
5+
longitudinal test can build on ``desc-brain`` T1w derivatives without
6+
paying the preprocessing cost per test.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import shutil
12+
import subprocess
13+
from pathlib import Path
14+
15+
import pytest
16+
17+
_REPO_ROOT = Path(__file__).resolve().parents[3]
18+
_DOWNLOAD_SCRIPT = _REPO_ROOT / "scripts" / "download_ds000114.py"
19+
_DATASET_DIR = _REPO_ROOT / "tests" / "data" / "ds000114"
20+
_DATASET_SENTINEL = (
21+
_DATASET_DIR / "sub-01" / "ses-test" / "anat" / "sub-01_ses-test_T1w.nii.gz"
22+
)
23+
24+
25+
@pytest.fixture(scope="session")
26+
def ds000114_dataset() -> Path:
27+
"""Return the ds000114 BIDS dataset root, downloading it on first use.
28+
29+
Skips the calling test if the dataset can't be fetched (most commonly
30+
because the S3 mirror is unreachable or the uv runner isn't available),
31+
so local developers without network access don't see hard failures
32+
while CI still exercises the path.
33+
"""
34+
if _DATASET_SENTINEL.exists():
35+
return _DATASET_DIR
36+
37+
if not _DOWNLOAD_SCRIPT.exists():
38+
pytest.skip(f"download script missing: {_DOWNLOAD_SCRIPT}")
39+
40+
uv = shutil.which("uv")
41+
if uv is None:
42+
pytest.skip("uv not found on PATH; cannot run download script")
43+
44+
result = subprocess.run( # noqa: S603
45+
[uv, "run", str(_DOWNLOAD_SCRIPT), str(_DATASET_DIR)],
46+
capture_output=True,
47+
text=True,
48+
)
49+
if result.returncode != 0 or not _DATASET_SENTINEL.exists():
50+
pytest.skip(
51+
"ds000114 download failed; skipping longitudinal tests.\n"
52+
f"--- stdout ---\n{result.stdout[-1000:]}\n"
53+
f"--- stderr ---\n{result.stderr[-1000:]}"
54+
)
55+
return _DATASET_DIR
56+
57+
58+
@pytest.fixture(scope="session")
59+
def runner_backend(request: pytest.FixtureRequest) -> str:
60+
"""Styx runner backend selected via ``--runner`` on the pytest CLI."""
61+
return request.config.getoption("--runner")
62+
63+
64+
@pytest.fixture(scope="session")
65+
def ds000114_anat_derivatives(
66+
ds000114_dataset: Path,
67+
tmp_path_factory: pytest.TempPathFactory,
68+
runner_backend: str,
69+
) -> Path:
70+
"""Run ``rbc anatomical`` once against ds000114 sub-01 (both sessions).
71+
72+
The longitudinal template stage consumes the cross-sectional
73+
``desc-brain`` T1w derivatives, so we produce them up front. Session
74+
scope ensures we only pay the registration + brain extraction cost
75+
once across all longitudinal integration tests.
76+
"""
77+
rbc = shutil.which("rbc")
78+
if rbc is None:
79+
pytest.skip("rbc CLI not found on PATH")
80+
81+
out = tmp_path_factory.mktemp("ds000114_derivatives")
82+
result = subprocess.run( # noqa: S603
83+
[
84+
rbc,
85+
"anatomical",
86+
str(ds000114_dataset),
87+
"-o",
88+
str(out),
89+
"--runner",
90+
runner_backend,
91+
"--participant-label",
92+
"01",
93+
],
94+
capture_output=True,
95+
text=True,
96+
timeout=3600,
97+
)
98+
assert result.returncode == 0, (
99+
f"rbc anatomical exited with code {result.returncode}\n"
100+
f"--- stdout ---\n{result.stdout[-2000:]}\n"
101+
f"--- stderr ---\n{result.stderr[-2000:]}"
102+
)
103+
return out
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Integration test for ``rbc longitudinal template``.
2+
3+
Deferred from Stage 2 of the longitudinal refactor (tracker #301,
4+
Stage 2 landed in PR #306). Depends on the ds000114 multi-session test
5+
fixture; cross-sectional anatomical derivatives are produced by the
6+
session-scoped ``ds000114_anat_derivatives`` fixture so the template
7+
stage has ``desc-brain`` T1w volumes to consume.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import shutil
13+
import subprocess
14+
from typing import TYPE_CHECKING
15+
16+
import pytest
17+
18+
if TYPE_CHECKING:
19+
from pathlib import Path
20+
21+
22+
@pytest.mark.slow
23+
def test_rbc_longitudinal_template_builds_bids_tree(
24+
ds000114_anat_derivatives: Path,
25+
runner_backend: str,
26+
) -> None:
27+
"""Run ``rbc longitudinal template`` and verify the BIDS output tree."""
28+
rbc = shutil.which("rbc")
29+
assert rbc is not None, "rbc CLI not found on PATH"
30+
31+
result = subprocess.run( # noqa: S603
32+
[
33+
rbc,
34+
"longitudinal",
35+
"template",
36+
str(ds000114_anat_derivatives),
37+
"-o",
38+
str(ds000114_anat_derivatives),
39+
"--runner",
40+
runner_backend,
41+
"--participant-label",
42+
"01",
43+
],
44+
capture_output=True,
45+
text=True,
46+
timeout=3600,
47+
)
48+
assert result.returncode == 0, (
49+
f"rbc longitudinal template exited with code {result.returncode}\n"
50+
f"--- stdout ---\n{result.stdout[-2000:]}\n"
51+
f"--- stderr ---\n{result.stderr[-2000:]}"
52+
)
53+
54+
ses_long = ds000114_anat_derivatives / "sub-01" / "ses-longitudinal" / "anat"
55+
expected = [
56+
"sub-01_ses-longitudinal_T1w.nii.gz",
57+
"sub-01_ses-longitudinal_from-test_to-longitudinal_mode-image_xfm.txt",
58+
"sub-01_ses-longitudinal_from-retest_to-longitudinal_mode-image_xfm.txt",
59+
]
60+
missing = [name for name in expected if not (ses_long / name).is_file()]
61+
if missing:
62+
tree = sorted(
63+
str(p.relative_to(ds000114_anat_derivatives))
64+
for p in ds000114_anat_derivatives.rglob("*")
65+
if p.is_file()
66+
)
67+
pytest.fail(
68+
"Missing expected longitudinal derivatives:\n "
69+
+ "\n ".join(missing)
70+
+ "\n--- file tree ---\n"
71+
+ "\n".join(tree)
72+
)

0 commit comments

Comments
 (0)