From 942c4438f9f14668b13cfb10cbc154d59ad672b4 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 11:00:45 -0400 Subject: [PATCH 01/12] Fix session filter dropping sessionless datasets and add rbc-all integration test (#289) `pl.col("ses") != "longitudinal"` evaluates to null (falsy) when ses is None, silently skipping every row in datasets without session labels. Replace with `ne_missing()` in all three cross-sectional orchestration modules so null sessions are preserved. Add an integration test that runs `rbc all` on tests/data/ds000001 with Docker and verifies key derivative files are produced. --- src/rbc/orchestration/all.py | 2 +- src/rbc/orchestration/anatomical.py | 2 +- src/rbc/orchestration/functional.py | 2 +- tests/integration/test_all.py | 97 +++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_all.py diff --git a/src/rbc/orchestration/all.py b/src/rbc/orchestration/all.py index 2b920778..2a5454fa 100644 --- a/src/rbc/orchestration/all.py +++ b/src/rbc/orchestration/all.py @@ -84,7 +84,7 @@ def run( df = filters.apply( df, - pl.col("ses") != "longitudinal", + pl.col("ses").ne_missing("longitudinal"), pl.col("space").is_null(), pl.col("desc").is_null(), ) diff --git a/src/rbc/orchestration/anatomical.py b/src/rbc/orchestration/anatomical.py index 1c229a02..db377455 100644 --- a/src/rbc/orchestration/anatomical.py +++ b/src/rbc/orchestration/anatomical.py @@ -93,7 +93,7 @@ def run( df = filters.apply( df, - pl.col("ses") != "longitudinal", + pl.col("ses").ne_missing("longitudinal"), pl.col("space").is_null(), pl.col("desc").is_null(), ) diff --git a/src/rbc/orchestration/functional.py b/src/rbc/orchestration/functional.py index 084d5199..d6058814 100644 --- a/src/rbc/orchestration/functional.py +++ b/src/rbc/orchestration/functional.py @@ -132,7 +132,7 @@ def run( df = filters.apply( df, - pl.col("ses") != "longitudinal", + pl.col("ses").ne_missing("longitudinal"), pl.col("space").is_null(), ) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py new file mode 100644 index 00000000..db849cfb --- /dev/null +++ b/tests/integration/test_all.py @@ -0,0 +1,97 @@ +"""Integration test for ``rbc all`` pipeline stage handoff. + +Runs the full pipeline on the ds000001 test dataset with Docker and verifies +that it completes without errors and that key derivative files exist. +""" + +from __future__ import annotations + +import shutil +import subprocess +from pathlib import Path + +import pytest + +_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001" + +# Subject with no session in ds000001. +_SUB = "01" +_TASK = "balloonanalogrisktask" +_RUN = "01" + + +@pytest.mark.slow +def test_rbc_all_completes_and_produces_derivatives( + request: pytest.FixtureRequest, + tmp_path: Path, +) -> None: + """``rbc all`` runs end-to-end and writes expected derivative files.""" + runner = request.config.getoption("--runner") + output_dir = tmp_path / "derivatives" + output_dir.mkdir() + + rbc_exe = shutil.which("rbc") + assert rbc_exe is not None, "rbc CLI not found on PATH" + + result = subprocess.run( # noqa: S603 + [ + rbc_exe, + "all", + str(_TEST_DATASET), + "-o", + str(output_dir), + "--participant-label", + _SUB, + "--runner", + runner, + ], + capture_output=True, + text=True, + timeout=7200, # 2 h ceiling + ) + assert result.returncode == 0, ( + f"rbc all exited with code {result.returncode}\n" + f"--- stdout ---\n{result.stdout[-2000:]}\n" + f"--- stderr ---\n{result.stderr[-2000:]}" + ) + + sub_dir = output_dir / f"sub-{_SUB}" + + # -- Dataset-level metadata -- + assert (output_dir / "dataset_description.json").is_file() + + # -- Anatomical derivatives -- + anat = sub_dir / "anat" + anat_files = [ + f"sub-{_SUB}_desc-brain_T1w.nii.gz", + f"sub-{_SUB}_desc-T1w_mask.nii.gz", + f"sub-{_SUB}_desc-csf_mask.nii.gz", + f"sub-{_SUB}_desc-gm_mask.nii.gz", + f"sub-{_SUB}_desc-wm_mask.nii.gz", + f"sub-{_SUB}_desc-wmBBR_mask.nii.gz", + ] + for name in anat_files: + assert (anat / name).is_file(), f"Missing anatomical file: {name}" + + # -- Functional derivatives -- + func = sub_dir / "func" + bold_stem = f"sub-{_SUB}_task-{_TASK}_run-{_RUN}" + func_files = [ + f"{bold_stem}_sbref.nii.gz", + f"{bold_stem}_desc-preproc_bold.nii.gz", + f"{bold_stem}_desc-motionParams_motion.1D", + f"{bold_stem}_desc-brain_mask.nii.gz", + ] + for name in func_files: + assert (func / name).is_file(), f"Missing functional file: {name}" + + # -- QC derivative (at least one regressor) -- + qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv")) + assert qc_files, "No QC quality TSV files found" + + # -- Metrics derivatives -- + timeseries = list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")) + assert timeseries, "No timeseries TSV files found" + + correlations = list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")) + assert correlations, "No correlation matrix TSV files found" From 0ded0cdd1abe64eca0552492ce896ab2b80ec0f1 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 11:04:26 -0400 Subject: [PATCH 02/12] Add sequential-run test and output comparison for rbc all vs individual stages Tests both invocation styles (rbc all vs anatomical/functional/metrics/qc in sequence) and verifies they produce the same set of derivative files. Uses session-scoped fixtures so each pipeline variant runs only once. --- tests/integration/test_all.py | 171 +++++++++++++++++++++++++++------- 1 file changed, 139 insertions(+), 32 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index db849cfb..a6cf3f7b 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -1,7 +1,9 @@ """Integration test for ``rbc all`` pipeline stage handoff. -Runs the full pipeline on the ds000001 test dataset with Docker and verifies -that it completes without errors and that key derivative files exist. +Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the four +individual subcommands in sequence (disk round-trip), then verifies that both +approaches complete without errors, produce key derivative files, and yield +identical outputs. """ from __future__ import annotations @@ -9,9 +11,13 @@ import shutil import subprocess from pathlib import Path +from typing import TYPE_CHECKING import pytest +if TYPE_CHECKING: + from collections.abc import Sequence + _TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001" # Subject with no session in ds000001. @@ -20,41 +26,102 @@ _RUN = "01" -@pytest.mark.slow -def test_rbc_all_completes_and_produces_derivatives( - request: pytest.FixtureRequest, - tmp_path: Path, -) -> None: - """``rbc all`` runs end-to-end and writes expected derivative files.""" - runner = request.config.getoption("--runner") - output_dir = tmp_path / "derivatives" - output_dir.mkdir() +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- - rbc_exe = shutil.which("rbc") - assert rbc_exe is not None, "rbc CLI not found on PATH" +def _rbc_exe() -> str: + exe = shutil.which("rbc") + assert exe is not None, "rbc CLI not found on PATH" + return exe + + +def _run_rbc( + args: Sequence[str], *, timeout: int = 7200 +) -> subprocess.CompletedProcess[str]: result = subprocess.run( # noqa: S603 - [ - rbc_exe, - "all", - str(_TEST_DATASET), - "-o", - str(output_dir), - "--participant-label", - _SUB, - "--runner", - runner, - ], + [_rbc_exe(), *args], capture_output=True, text=True, - timeout=7200, # 2 h ceiling + timeout=timeout, ) assert result.returncode == 0, ( - f"rbc all exited with code {result.returncode}\n" + f"rbc {args[0]} exited with code {result.returncode}\n" f"--- stdout ---\n{result.stdout[-2000:]}\n" f"--- stderr ---\n{result.stderr[-2000:]}" ) + return result + + +_COMMON_ARGS: list[str] = ["--participant-label", _SUB] + + +def _relative_files(root: Path) -> set[str]: + """Return the set of file paths relative to *root*.""" + return {str(p.relative_to(root)) for p in root.rglob("*") if p.is_file()} + + +# --------------------------------------------------------------------------- +# Fixtures — each pipeline variant runs once per session +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def _runner(request: pytest.FixtureRequest) -> str: + return request.config.getoption("--runner") + + +@pytest.fixture(scope="session") +def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: + """Run ``rbc all`` once and return the output directory.""" + out = tmp_path_factory.mktemp("all") / "derivatives" + out.mkdir() + _run_rbc( + [ + "all", + str(_TEST_DATASET), + "-o", + str(out), + "--runner", + _runner, + *_COMMON_ARGS, + ] + ) + return out + +@pytest.fixture(scope="session") +def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: + """Run the four individual subcommands in order and return the output dir.""" + out = tmp_path_factory.mktemp("sequential") / "derivatives" + out.mkdir() + + runner_args = ["--runner", _runner, *_COMMON_ARGS] + input_args = [str(_TEST_DATASET), "-o", str(out)] + + # 1. anatomical + _run_rbc(["anatomical", *input_args, *runner_args]) + + # 2. functional (reads anat derivatives from output_dir) + _run_rbc(["functional", *input_args, *runner_args]) + + # 3. metrics (reads func derivatives from output_dir) + _run_rbc(["metrics", *input_args, *runner_args]) + + # 4. qc (reads func derivatives from output_dir) + _run_rbc(["qc", *input_args, *runner_args]) + + return out + + +# --------------------------------------------------------------------------- +# Assertion helpers +# --------------------------------------------------------------------------- + + +def _assert_derivatives_exist(output_dir: Path) -> None: + """Check that all expected derivative files are present.""" sub_dir = output_dir / f"sub-{_SUB}" # -- Dataset-level metadata -- @@ -85,13 +152,53 @@ def test_rbc_all_completes_and_produces_derivatives( for name in func_files: assert (func / name).is_file(), f"Missing functional file: {name}" - # -- QC derivative (at least one regressor) -- + # -- QC -- qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv")) assert qc_files, "No QC quality TSV files found" - # -- Metrics derivatives -- - timeseries = list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")) - assert timeseries, "No timeseries TSV files found" + # -- Metrics -- + assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), ( + "No timeseries TSV files found" + ) + assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), ( + "No correlation matrix TSV files found" + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- - correlations = list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")) - assert correlations, "No correlation matrix TSV files found" + +@pytest.mark.slow +def test_rbc_all_produces_derivatives(all_output: Path) -> None: + """``rbc all`` runs end-to-end and writes expected derivative files.""" + _assert_derivatives_exist(all_output) + + +@pytest.mark.slow +def test_sequential_produces_derivatives(sequential_output: Path) -> None: + """Running anatomical/functional/metrics/qc individually produces the same files.""" + _assert_derivatives_exist(sequential_output) + + +@pytest.mark.slow +def test_all_vs_sequential_outputs_match( + all_output: Path, + sequential_output: Path, +) -> None: + """Both invocation styles must produce the same set of derivative files.""" + all_files = _relative_files(all_output) + seq_files = _relative_files(sequential_output) + + missing_from_seq = all_files - seq_files + extra_in_seq = seq_files - all_files + + assert not missing_from_seq, ( + "Files produced by 'rbc all' but missing from sequential run:\n" + + "\n".join(sorted(missing_from_seq)) + ) + assert not extra_in_seq, ( + "Files produced by sequential run but missing from 'rbc all':\n" + + "\n".join(sorted(extra_in_seq)) + ) From 6ed9b5405461c5619a88229a91bc862c6c1fea20 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 11:17:01 -0400 Subject: [PATCH 03/12] Pass derivatives dir as input to sequential stages The functional/metrics/qc stages need to discover derivatives from previous stages via load_table, so the output dir must be included in the input dirs alongside the raw BIDS dataset. --- tests/integration/test_all.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index a6cf3f7b..28cf0f01 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -98,19 +98,20 @@ def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> out.mkdir() runner_args = ["--runner", _runner, *_COMMON_ARGS] - input_args = [str(_TEST_DATASET), "-o", str(out)] + raw = str(_TEST_DATASET) + deriv = str(out) - # 1. anatomical - _run_rbc(["anatomical", *input_args, *runner_args]) + # 1. anatomical (raw BIDS only) + _run_rbc(["anatomical", raw, "-o", deriv, *runner_args]) - # 2. functional (reads anat derivatives from output_dir) - _run_rbc(["functional", *input_args, *runner_args]) + # 2. functional (raw BIDS + anat derivatives) + _run_rbc(["functional", raw, deriv, "-o", deriv, *runner_args]) - # 3. metrics (reads func derivatives from output_dir) - _run_rbc(["metrics", *input_args, *runner_args]) + # 3. metrics (raw BIDS + derivatives from previous stages) + _run_rbc(["metrics", raw, deriv, "-o", deriv, *runner_args]) - # 4. qc (reads func derivatives from output_dir) - _run_rbc(["qc", *input_args, *runner_args]) + # 4. qc (raw BIDS + derivatives from previous stages) + _run_rbc(["qc", raw, deriv, "-o", deriv, *runner_args]) return out From d0e5d0366443aeb4661d5065989326815e59701b Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 14:22:35 -0400 Subject: [PATCH 04/12] Apply bids_safe_label in metrics and QC resolve functions resolve_metrics and resolve_qc queried extra entities with raw regressor names (e.g. "36-parameter") but export_functional saves them through bids_safe_label (e.g. "36parameter"). This mismatch caused FileNotFoundError when running rbc metrics or rbc qc standalone. --- src/rbc/bids/metrics.py | 4 ++-- src/rbc/bids/qc.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rbc/bids/metrics.py b/src/rbc/bids/metrics.py index b2bda843..422a9918 100644 --- a/src/rbc/bids/metrics.py +++ b/src/rbc/bids/metrics.py @@ -47,13 +47,13 @@ def resolve_metrics( deriv_df, suffix=Suffix.BOLD, desc="regressed", - extra={"reg": regressor}, + extra={"reg": bids_safe_label(regressor)}, ), "cleaned_bold": mni_q.expect( deriv_df, suffix=Suffix.BOLD, desc="preproc", - extra={"reg": regressor}, + extra={"reg": bids_safe_label(regressor)}, ), } diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py index f249d619..e33154a7 100644 --- a/src/rbc/bids/qc.py +++ b/src/rbc/bids/qc.py @@ -57,7 +57,7 @@ def resolve_qc( deriv_df, suffix=Suffix.BOLD, desc="preproc", - extra={"reg": reg}, + extra={"reg": bids_safe_label(reg)}, ) for reg in regressors }, From dc372e3137fdff554e52cf07d631525cdf8afa56 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 14:26:14 -0400 Subject: [PATCH 05/12] Truncate BOLD to 25 volumes in integration test fixture Copies ds000001 and truncates the BOLD timeseries before running the pipeline, cutting functional processing time significantly while still exercising the full code path (motion correction, nuisance regression, metrics, QC). --- tests/integration/test_all.py | 45 +++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index 28cf0f01..e365bd85 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -4,6 +4,9 @@ individual subcommands in sequence (disk round-trip), then verifies that both approaches complete without errors, produce key derivative files, and yield identical outputs. + +The BOLD timeseries is truncated to a small number of volumes so that +functional processing finishes in minutes rather than tens of minutes. """ from __future__ import annotations @@ -13,18 +16,24 @@ from pathlib import Path from typing import TYPE_CHECKING +import nibabel as nib +import numpy as np import pytest if TYPE_CHECKING: from collections.abc import Sequence -_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001" +_ORIG_DATASET = Path(__file__).parents[1] / "data" / "ds000001" # Subject with no session in ds000001. _SUB = "01" _TASK = "balloonanalogrisktask" _RUN = "01" +# Number of BOLD volumes to keep. Must be enough for nuisance regression +# (after discarding --start-tr 2) but small enough to be fast. +_BOLD_VOLUMES = 25 + # --------------------------------------------------------------------------- # Helpers @@ -73,14 +82,36 @@ def _runner(request: pytest.FixtureRequest) -> str: @pytest.fixture(scope="session") -def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: +def truncated_dataset(tmp_path_factory: pytest.TempPathFactory) -> Path: + """Copy ds000001 and truncate the BOLD to *_BOLD_VOLUMES* volumes.""" + dest = tmp_path_factory.mktemp("ds") / "ds000001" + shutil.copytree(_ORIG_DATASET, dest) + + bold = ( + dest + / f"sub-{_SUB}" + / "func" + / f"sub-{_SUB}_task-{_TASK}_run-{_RUN}_bold.nii.gz" + ) + img = nib.nifti1.load(bold) + data = np.asarray(img.dataobj[:, :, :, :_BOLD_VOLUMES]) + nib.nifti1.save(nib.Nifti1Image(data, img.affine, img.header), bold) + return dest + + +@pytest.fixture(scope="session") +def all_output( + tmp_path_factory: pytest.TempPathFactory, + _runner: str, + truncated_dataset: Path, +) -> Path: """Run ``rbc all`` once and return the output directory.""" out = tmp_path_factory.mktemp("all") / "derivatives" out.mkdir() _run_rbc( [ "all", - str(_TEST_DATASET), + str(truncated_dataset), "-o", str(out), "--runner", @@ -92,13 +123,17 @@ def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: @pytest.fixture(scope="session") -def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: +def sequential_output( + tmp_path_factory: pytest.TempPathFactory, + _runner: str, + truncated_dataset: Path, +) -> Path: """Run the four individual subcommands in order and return the output dir.""" out = tmp_path_factory.mktemp("sequential") / "derivatives" out.mkdir() runner_args = ["--runner", _runner, *_COMMON_ARGS] - raw = str(_TEST_DATASET) + raw = str(truncated_dataset) deriv = str(out) # 1. anatomical (raw BIDS only) From 8574005aca9396fdf430fc4c1e5c01a5500f3d3d Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 16:44:19 -0400 Subject: [PATCH 06/12] Bump BOLD truncation to 50 volumes for 36-parameter regression 25 volumes was too few: after discarding 2 TRs, only 23 timepoints remained, which is fewer than the ~36 columns in the 36-parameter nuisance regressor model, causing 3dTproject to fail. --- tests/integration/test_all.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index e365bd85..b1d09d2f 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -30,9 +30,10 @@ _TASK = "balloonanalogrisktask" _RUN = "01" -# Number of BOLD volumes to keep. Must be enough for nuisance regression -# (after discarding --start-tr 2) but small enough to be fast. -_BOLD_VOLUMES = 25 +# Number of BOLD volumes to keep. The 36-parameter regressor model needs +# more timepoints than regressors (~36 columns + derivatives). After +# discarding --start-tr 2, we need at least ~45 usable volumes. +_BOLD_VOLUMES = 50 # --------------------------------------------------------------------------- From 5167f70e62b4f61f0451381f9fbae203fc3668f6 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 16:46:35 -0400 Subject: [PATCH 07/12] Revert BOLD truncation, use full test dataset for now Remove the volume truncation optimization so we can shake out all bugs with the real data first. Can revisit truncation once the pipeline passes end-to-end. --- tests/integration/test_all.py | 46 ++++------------------------------- 1 file changed, 5 insertions(+), 41 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index b1d09d2f..28cf0f01 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -4,9 +4,6 @@ individual subcommands in sequence (disk round-trip), then verifies that both approaches complete without errors, produce key derivative files, and yield identical outputs. - -The BOLD timeseries is truncated to a small number of volumes so that -functional processing finishes in minutes rather than tens of minutes. """ from __future__ import annotations @@ -16,25 +13,18 @@ from pathlib import Path from typing import TYPE_CHECKING -import nibabel as nib -import numpy as np import pytest if TYPE_CHECKING: from collections.abc import Sequence -_ORIG_DATASET = Path(__file__).parents[1] / "data" / "ds000001" +_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001" # Subject with no session in ds000001. _SUB = "01" _TASK = "balloonanalogrisktask" _RUN = "01" -# Number of BOLD volumes to keep. The 36-parameter regressor model needs -# more timepoints than regressors (~36 columns + derivatives). After -# discarding --start-tr 2, we need at least ~45 usable volumes. -_BOLD_VOLUMES = 50 - # --------------------------------------------------------------------------- # Helpers @@ -83,36 +73,14 @@ def _runner(request: pytest.FixtureRequest) -> str: @pytest.fixture(scope="session") -def truncated_dataset(tmp_path_factory: pytest.TempPathFactory) -> Path: - """Copy ds000001 and truncate the BOLD to *_BOLD_VOLUMES* volumes.""" - dest = tmp_path_factory.mktemp("ds") / "ds000001" - shutil.copytree(_ORIG_DATASET, dest) - - bold = ( - dest - / f"sub-{_SUB}" - / "func" - / f"sub-{_SUB}_task-{_TASK}_run-{_RUN}_bold.nii.gz" - ) - img = nib.nifti1.load(bold) - data = np.asarray(img.dataobj[:, :, :, :_BOLD_VOLUMES]) - nib.nifti1.save(nib.Nifti1Image(data, img.affine, img.header), bold) - return dest - - -@pytest.fixture(scope="session") -def all_output( - tmp_path_factory: pytest.TempPathFactory, - _runner: str, - truncated_dataset: Path, -) -> Path: +def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: """Run ``rbc all`` once and return the output directory.""" out = tmp_path_factory.mktemp("all") / "derivatives" out.mkdir() _run_rbc( [ "all", - str(truncated_dataset), + str(_TEST_DATASET), "-o", str(out), "--runner", @@ -124,17 +92,13 @@ def all_output( @pytest.fixture(scope="session") -def sequential_output( - tmp_path_factory: pytest.TempPathFactory, - _runner: str, - truncated_dataset: Path, -) -> Path: +def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: """Run the four individual subcommands in order and return the output dir.""" out = tmp_path_factory.mktemp("sequential") / "derivatives" out.mkdir() runner_args = ["--runner", _runner, *_COMMON_ARGS] - raw = str(truncated_dataset) + raw = str(_TEST_DATASET) deriv = str(out) # 1. anatomical (raw BIDS only) From 4404b9a1e7579bdcc1b255a17b72b020f78d1a94 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 20:54:14 -0400 Subject: [PATCH 08/12] Reuse anatomical outputs from rbc-all in sequential test The sequential test now copies anat derivatives from the rbc-all run instead of re-running brain extraction and registration, saving ~30-40 min of CI time. The disk round-trip handoff between functional, metrics, and QC stages is still fully tested. --- tests/integration/test_all.py | 44 ++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index 28cf0f01..a7a316a0 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -1,9 +1,12 @@ """Integration test for ``rbc all`` pipeline stage handoff. -Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the four -individual subcommands in sequence (disk round-trip), then verifies that both -approaches complete without errors, produce key derivative files, and yield -identical outputs. +Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the +individual subcommands in sequence (disk round-trip), then verifies that +both approaches complete without errors, produce key derivative files, +and yield identical outputs. + +The sequential run reuses anatomical outputs from the ``rbc all`` run to +avoid running brain extraction and registration twice. """ from __future__ import annotations @@ -92,25 +95,40 @@ def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: @pytest.fixture(scope="session") -def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path: - """Run the four individual subcommands in order and return the output dir.""" +def sequential_output( + tmp_path_factory: pytest.TempPathFactory, + _runner: str, + all_output: Path, +) -> Path: + """Run functional/metrics/qc using anat outputs from ``rbc all``. + + Copies the anatomical derivatives produced by the ``all_output`` + fixture so that only the functional, metrics, and QC stages run, + saving ~30-40 min of redundant brain extraction and registration. + """ out = tmp_path_factory.mktemp("sequential") / "derivatives" out.mkdir() + # Seed with anatomical outputs + dataset_description from rbc all + anat_src = all_output / f"sub-{_SUB}" / "anat" + anat_dst = out / f"sub-{_SUB}" / "anat" + shutil.copytree(anat_src, anat_dst) + shutil.copy2( + all_output / "dataset_description.json", + out / "dataset_description.json", + ) + runner_args = ["--runner", _runner, *_COMMON_ARGS] raw = str(_TEST_DATASET) deriv = str(out) - # 1. anatomical (raw BIDS only) - _run_rbc(["anatomical", raw, "-o", deriv, *runner_args]) - - # 2. functional (raw BIDS + anat derivatives) + # functional (raw BIDS + anat derivatives) _run_rbc(["functional", raw, deriv, "-o", deriv, *runner_args]) - # 3. metrics (raw BIDS + derivatives from previous stages) + # metrics (derivatives from previous stages) _run_rbc(["metrics", raw, deriv, "-o", deriv, *runner_args]) - # 4. qc (raw BIDS + derivatives from previous stages) + # qc (derivatives from previous stages) _run_rbc(["qc", raw, deriv, "-o", deriv, *runner_args]) return out @@ -179,7 +197,7 @@ def test_rbc_all_produces_derivatives(all_output: Path) -> None: @pytest.mark.slow def test_sequential_produces_derivatives(sequential_output: Path) -> None: - """Running anatomical/functional/metrics/qc individually produces the same files.""" + """Running functional/metrics/qc individually produces the same files.""" _assert_derivatives_exist(sequential_output) From d064f8bd5312d535cf8306b33648728829dde234 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 21:01:14 -0400 Subject: [PATCH 09/12] Print derivative file tree on assertion failure for CI debugging --- tests/integration/test_all.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index a7a316a0..b5930c0d 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -65,6 +65,12 @@ def _relative_files(root: Path) -> set[str]: return {str(p.relative_to(root)) for p in root.rglob("*") if p.is_file()} +def _file_tree(root: Path) -> str: + """Return a newline-separated listing of all files under *root*.""" + files = sorted(p.relative_to(root) for p in root.rglob("*") if p.is_file()) + return "\n".join(str(f) for f in files) if files else "(empty)" + + # --------------------------------------------------------------------------- # Fixtures — each pipeline variant runs once per session # --------------------------------------------------------------------------- @@ -141,10 +147,13 @@ def sequential_output( def _assert_derivatives_exist(output_dir: Path) -> None: """Check that all expected derivative files are present.""" + tree = _file_tree(output_dir) sub_dir = output_dir / f"sub-{_SUB}" # -- Dataset-level metadata -- - assert (output_dir / "dataset_description.json").is_file() + assert (output_dir / "dataset_description.json").is_file(), ( + f"Missing dataset_description.json\n--- file tree ---\n{tree}" + ) # -- Anatomical derivatives -- anat = sub_dir / "anat" @@ -157,7 +166,9 @@ def _assert_derivatives_exist(output_dir: Path) -> None: f"sub-{_SUB}_desc-wmBBR_mask.nii.gz", ] for name in anat_files: - assert (anat / name).is_file(), f"Missing anatomical file: {name}" + assert (anat / name).is_file(), ( + f"Missing anatomical file: {name}\n--- file tree ---\n{tree}" + ) # -- Functional derivatives -- func = sub_dir / "func" @@ -169,18 +180,20 @@ def _assert_derivatives_exist(output_dir: Path) -> None: f"{bold_stem}_desc-brain_mask.nii.gz", ] for name in func_files: - assert (func / name).is_file(), f"Missing functional file: {name}" + assert (func / name).is_file(), ( + f"Missing functional file: {name}\n--- file tree ---\n{tree}" + ) # -- QC -- qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv")) - assert qc_files, "No QC quality TSV files found" + assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}" # -- Metrics -- assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), ( - "No timeseries TSV files found" + f"No timeseries TSV files found\n--- file tree ---\n{tree}" ) assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), ( - "No correlation matrix TSV files found" + f"No correlation matrix TSV files found\n--- file tree ---\n{tree}" ) From 554e3b351df5d4e68c5ef3468b0d6037aa2f1864 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 21:05:05 -0400 Subject: [PATCH 10/12] Fix run entity format and QC duplicate-match bug The BIDS path builder formats integer run entities as "run-1" (not "run-01"), so update the test assertion accordingly. resolve_qc's template_bold query matched both the untagged preproc bold and per-regressor preproc bolds. Add without=["reg"] to disambiguate. --- src/rbc/bids/qc.py | 4 +++- tests/integration/test_all.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py index e33154a7..e449a696 100644 --- a/src/rbc/bids/qc.py +++ b/src/rbc/bids/qc.py @@ -51,7 +51,9 @@ def resolve_qc( Dict with keys matching ``single_session_qc`` parameters. """ return { - "template_bold": func_mni.expect(deriv_df, suffix=Suffix.BOLD, desc="preproc"), + "template_bold": func_mni.expect( + deriv_df, suffix=Suffix.BOLD, desc="preproc", without=["reg"] + ), "cleaned_bold": { reg: func_mni.expect( deriv_df, diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index b5930c0d..511402fd 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -26,7 +26,7 @@ # Subject with no session in ds000001. _SUB = "01" _TASK = "balloonanalogrisktask" -_RUN = "01" +_RUN = "1" # --------------------------------------------------------------------------- From e91466a6eec1dd56e39b16982ca191ffaf0eb29e Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Thu, 9 Apr 2026 22:55:01 -0400 Subject: [PATCH 11/12] Fix QC resolve to exclude reg extra entity and relax QC glob pattern Use extra={"reg": False} instead of without=["reg"] since reg is an extra entity (not a standard BIDS column). Also relax the QC glob pattern since entity ordering puts reg before desc in the filename. --- src/rbc/bids/qc.py | 2 +- tests/integration/test_all.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py index e449a696..d036c05a 100644 --- a/src/rbc/bids/qc.py +++ b/src/rbc/bids/qc.py @@ -52,7 +52,7 @@ def resolve_qc( """ return { "template_bold": func_mni.expect( - deriv_df, suffix=Suffix.BOLD, desc="preproc", without=["reg"] + deriv_df, suffix=Suffix.BOLD, desc="preproc", extra={"reg": False} ), "cleaned_bold": { reg: func_mni.expect( diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index 511402fd..fc6159fd 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -185,7 +185,7 @@ def _assert_derivatives_exist(output_dir: Path) -> None: ) # -- QC -- - qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv")) + qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.tsv")) assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}" # -- Metrics -- From fb9ec6d208eb5627f0809979011b9e5aaf8dd763 Mon Sep 17 00:00:00 2001 From: Florian Rupprecht Date: Fri, 10 Apr 2026 00:50:37 -0400 Subject: [PATCH 12/12] Route metrics and QC intermediates to niwrap work dir compute_alff, compute_reho, and compute_timeseries default to writing outputs next to their input files. When the standalone metrics/QC orchestration resolves inputs from the derivatives dir, these intermediates leak into the output alongside the BIDS-named exports. Use generate_exec_folder() to create a dedicated work directory under the niwrap runner's data_dir, and pass explicit out_file/out_dir paths so intermediates stay out of the derivatives tree. --- src/rbc/workflows/metrics.py | 17 ++++++++++++++--- src/rbc/workflows/qc.py | 6 ++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/rbc/workflows/metrics.py b/src/rbc/workflows/metrics.py index 06e4da0f..929f2d9d 100644 --- a/src/rbc/workflows/metrics.py +++ b/src/rbc/workflows/metrics.py @@ -17,6 +17,7 @@ from rbc.core.metrics.smoothing import smooth from rbc.core.metrics.standardization import compute_zscore from rbc.core.metrics.timeseries import compute_timeseries +from rbc.core.niwrap import generate_exec_folder if TYPE_CHECKING: from collections.abc import Mapping @@ -76,15 +77,23 @@ def single_session_metrics( Returns: All metric outputs bundled in a :class:`MetricsOutputs` tuple. """ + work_dir = generate_exec_folder("metrics") + # 1. ALFF / fALFF on regressed BOLD (non-bandpassed) _logger.info("Computing ALFF/fALFF") alff_path, falff_path = compute_alff( - regressed_bold, template_brain_mask, tr=tr, method="qm" + regressed_bold, + template_brain_mask, + tr=tr, + method="qm", + out_file=work_dir / "alff.nii.gz", ) # 2. ReHo on bandpass-filtered cleaned BOLD _logger.info("Computing ReHo") - reho_path = compute_reho(cleaned_bold, template_brain_mask) + reho_path = compute_reho( + cleaned_bold, template_brain_mask, out_file=work_dir / "reho.nii.gz" + ) # 3. Smooth raw maps _logger.info("Smoothing maps (FWHM=%.1f mm)", fwhm) @@ -103,7 +112,9 @@ def single_session_metrics( ts_outputs = {} for label, atlas_path in atlas_files.items(): _logger.info("Extracting atlas timeseries (%s)", label) - ts_outputs[label] = compute_timeseries(cleaned_bold, atlas_path) + ts_outputs[label] = compute_timeseries( + cleaned_bold, atlas_path, out_dir=work_dir + ) return MetricsOutputs( alff=alff_path, diff --git a/src/rbc/workflows/qc.py b/src/rbc/workflows/qc.py index 096cb14c..d2558428 100644 --- a/src/rbc/workflows/qc.py +++ b/src/rbc/workflows/qc.py @@ -17,6 +17,7 @@ from niwrap import fsl from rbc.bids import TemplateSpace +from rbc.core.niwrap import generate_exec_folder from rbc.core.qc.dvars import dvars_qc_metrics from rbc.core.qc.motion import framewise_displacement_jenkinson, motion_qc_metrics from rbc.core.qc.registration import registration_qc_metrics @@ -85,6 +86,8 @@ def single_session_qc( All QC outputs bundled in a :class:`QCOutputs` tuple. """ _logger.info("Computing QC metrics") + work_dir = generate_exec_folder("qc") + # 1. Load motion data rms_values = np.loadtxt(rms_rel) motion_data = np.loadtxt(motion_params) @@ -157,8 +160,7 @@ def single_session_qc( # 9. Write QC TSV qc_outputs.qc_file[regressor] = write_xcp_qc( qc_outputs.metrics[regressor], - template_bold.parent - / f"sub-{sub}_ses-{ses}_task-{task}_run-{run}_reg-{regressor}_qc.tsv", + work_dir / f"reg-{regressor}_qc.tsv", ) # 10. RBC pass/fail