From 942c4438f9f14668b13cfb10cbc154d59ad672b4 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 11:00:45 -0400
Subject: [PATCH 01/12] Fix session filter dropping sessionless datasets and
 add rbc-all integration test (#289)

`pl.col("ses") != "longitudinal"` evaluates to null (falsy) when ses is
None, silently skipping every row in datasets without session labels.
Replace with `ne_missing()` in all three cross-sectional orchestration
modules so null sessions are preserved.

Add an integration test that runs `rbc all` on tests/data/ds000001 with
Docker and verifies key derivative files are produced.
---
 src/rbc/orchestration/all.py        |  2 +-
 src/rbc/orchestration/anatomical.py |  2 +-
 src/rbc/orchestration/functional.py |  2 +-
 tests/integration/test_all.py       | 97 +++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 3 deletions(-)
 create mode 100644 tests/integration/test_all.py

diff --git a/src/rbc/orchestration/all.py b/src/rbc/orchestration/all.py
index 2b920778..2a5454fa 100644
--- a/src/rbc/orchestration/all.py
+++ b/src/rbc/orchestration/all.py
@@ -84,7 +84,7 @@ def run(
 
     df = filters.apply(
         df,
-        pl.col("ses") != "longitudinal",
+        pl.col("ses").ne_missing("longitudinal"),
         pl.col("space").is_null(),
         pl.col("desc").is_null(),
     )
diff --git a/src/rbc/orchestration/anatomical.py b/src/rbc/orchestration/anatomical.py
index 1c229a02..db377455 100644
--- a/src/rbc/orchestration/anatomical.py
+++ b/src/rbc/orchestration/anatomical.py
@@ -93,7 +93,7 @@ def run(
 
     df = filters.apply(
         df,
-        pl.col("ses") != "longitudinal",
+        pl.col("ses").ne_missing("longitudinal"),
         pl.col("space").is_null(),
         pl.col("desc").is_null(),
     )
diff --git a/src/rbc/orchestration/functional.py b/src/rbc/orchestration/functional.py
index 084d5199..d6058814 100644
--- a/src/rbc/orchestration/functional.py
+++ b/src/rbc/orchestration/functional.py
@@ -132,7 +132,7 @@ def run(
 
     df = filters.apply(
         df,
-        pl.col("ses") != "longitudinal",
+        pl.col("ses").ne_missing("longitudinal"),
         pl.col("space").is_null(),
     )
 
diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
new file mode 100644
index 00000000..db849cfb
--- /dev/null
+++ b/tests/integration/test_all.py
@@ -0,0 +1,97 @@
+"""Integration test for ``rbc all`` pipeline stage handoff.
+
+Runs the full pipeline on the ds000001 test dataset with Docker and verifies
+that it completes without errors and that key derivative files exist.
+"""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
+
+# Subject with no session in ds000001.
+_SUB = "01"
+_TASK = "balloonanalogrisktask"
+_RUN = "01"
+
+
+@pytest.mark.slow
+def test_rbc_all_completes_and_produces_derivatives(
+    request: pytest.FixtureRequest,
+    tmp_path: Path,
+) -> None:
+    """``rbc all`` runs end-to-end and writes expected derivative files."""
+    runner = request.config.getoption("--runner")
+    output_dir = tmp_path / "derivatives"
+    output_dir.mkdir()
+
+    rbc_exe = shutil.which("rbc")
+    assert rbc_exe is not None, "rbc CLI not found on PATH"
+
+    result = subprocess.run(  # noqa: S603
+        [
+            rbc_exe,
+            "all",
+            str(_TEST_DATASET),
+            "-o",
+            str(output_dir),
+            "--participant-label",
+            _SUB,
+            "--runner",
+            runner,
+        ],
+        capture_output=True,
+        text=True,
+        timeout=7200,  # 2 h ceiling
+    )
+    assert result.returncode == 0, (
+        f"rbc all exited with code {result.returncode}\n"
+        f"--- stdout ---\n{result.stdout[-2000:]}\n"
+        f"--- stderr ---\n{result.stderr[-2000:]}"
+    )
+
+    sub_dir = output_dir / f"sub-{_SUB}"
+
+    # -- Dataset-level metadata --
+    assert (output_dir / "dataset_description.json").is_file()
+
+    # -- Anatomical derivatives --
+    anat = sub_dir / "anat"
+    anat_files = [
+        f"sub-{_SUB}_desc-brain_T1w.nii.gz",
+        f"sub-{_SUB}_desc-T1w_mask.nii.gz",
+        f"sub-{_SUB}_desc-csf_mask.nii.gz",
+        f"sub-{_SUB}_desc-gm_mask.nii.gz",
+        f"sub-{_SUB}_desc-wm_mask.nii.gz",
+        f"sub-{_SUB}_desc-wmBBR_mask.nii.gz",
+    ]
+    for name in anat_files:
+        assert (anat / name).is_file(), f"Missing anatomical file: {name}"
+
+    # -- Functional derivatives --
+    func = sub_dir / "func"
+    bold_stem = f"sub-{_SUB}_task-{_TASK}_run-{_RUN}"
+    func_files = [
+        f"{bold_stem}_sbref.nii.gz",
+        f"{bold_stem}_desc-preproc_bold.nii.gz",
+        f"{bold_stem}_desc-motionParams_motion.1D",
+        f"{bold_stem}_desc-brain_mask.nii.gz",
+    ]
+    for name in func_files:
+        assert (func / name).is_file(), f"Missing functional file: {name}"
+
+    # -- QC derivative (at least one regressor) --
+    qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv"))
+    assert qc_files, "No QC quality TSV files found"
+
+    # -- Metrics derivatives --
+    timeseries = list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv"))
+    assert timeseries, "No timeseries TSV files found"
+
+    correlations = list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv"))
+    assert correlations, "No correlation matrix TSV files found"

From 0ded0cdd1abe64eca0552492ce896ab2b80ec0f1 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 11:04:26 -0400
Subject: [PATCH 02/12] Add sequential-run test and output comparison for rbc
 all vs individual stages

Tests both invocation styles (rbc all vs anatomical/functional/metrics/qc
in sequence) and verifies they produce the same set of derivative files.
Uses session-scoped fixtures so each pipeline variant runs only once.
---
 tests/integration/test_all.py | 171 +++++++++++++++++++++++++++-------
 1 file changed, 139 insertions(+), 32 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index db849cfb..a6cf3f7b 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -1,7 +1,9 @@
 """Integration test for ``rbc all`` pipeline stage handoff.
 
-Runs the full pipeline on the ds000001 test dataset with Docker and verifies
-that it completes without errors and that key derivative files exist.
+Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the four
+individual subcommands in sequence (disk round-trip), then verifies that both
+approaches complete without errors, produce key derivative files, and yield
+identical outputs.
 """
 
 from __future__ import annotations
@@ -9,9 +11,13 @@
 import shutil
 import subprocess
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 import pytest
 
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
 _TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
 
 # Subject with no session in ds000001.
@@ -20,41 +26,102 @@
 _RUN = "01"
 
 
-@pytest.mark.slow
-def test_rbc_all_completes_and_produces_derivatives(
-    request: pytest.FixtureRequest,
-    tmp_path: Path,
-) -> None:
-    """``rbc all`` runs end-to-end and writes expected derivative files."""
-    runner = request.config.getoption("--runner")
-    output_dir = tmp_path / "derivatives"
-    output_dir.mkdir()
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 
-    rbc_exe = shutil.which("rbc")
-    assert rbc_exe is not None, "rbc CLI not found on PATH"
 
+def _rbc_exe() -> str:
+    exe = shutil.which("rbc")
+    assert exe is not None, "rbc CLI not found on PATH"
+    return exe
+
+
+def _run_rbc(
+    args: Sequence[str], *, timeout: int = 7200
+) -> subprocess.CompletedProcess[str]:
     result = subprocess.run(  # noqa: S603
-        [
-            rbc_exe,
-            "all",
-            str(_TEST_DATASET),
-            "-o",
-            str(output_dir),
-            "--participant-label",
-            _SUB,
-            "--runner",
-            runner,
-        ],
+        [_rbc_exe(), *args],
         capture_output=True,
         text=True,
-        timeout=7200,  # 2 h ceiling
+        timeout=timeout,
     )
     assert result.returncode == 0, (
-        f"rbc all exited with code {result.returncode}\n"
+        f"rbc {args[0]} exited with code {result.returncode}\n"
         f"--- stdout ---\n{result.stdout[-2000:]}\n"
         f"--- stderr ---\n{result.stderr[-2000:]}"
     )
+    return result
+
+
+_COMMON_ARGS: list[str] = ["--participant-label", _SUB]
+
+
+def _relative_files(root: Path) -> set[str]:
+    """Return the set of file paths relative to *root*."""
+    return {str(p.relative_to(root)) for p in root.rglob("*") if p.is_file()}
+
+
+# ---------------------------------------------------------------------------
+# Fixtures — each pipeline variant runs once per session
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def _runner(request: pytest.FixtureRequest) -> str:
+    return request.config.getoption("--runner")
+
+
+@pytest.fixture(scope="session")
+def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
+    """Run ``rbc all`` once and return the output directory."""
+    out = tmp_path_factory.mktemp("all") / "derivatives"
+    out.mkdir()
+    _run_rbc(
+        [
+            "all",
+            str(_TEST_DATASET),
+            "-o",
+            str(out),
+            "--runner",
+            _runner,
+            *_COMMON_ARGS,
+        ]
+    )
+    return out
+
 
+@pytest.fixture(scope="session")
+def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
+    """Run the four individual subcommands in order and return the output dir."""
+    out = tmp_path_factory.mktemp("sequential") / "derivatives"
+    out.mkdir()
+
+    runner_args = ["--runner", _runner, *_COMMON_ARGS]
+    input_args = [str(_TEST_DATASET), "-o", str(out)]
+
+    # 1. anatomical
+    _run_rbc(["anatomical", *input_args, *runner_args])
+
+    # 2. functional (reads anat derivatives from output_dir)
+    _run_rbc(["functional", *input_args, *runner_args])
+
+    # 3. metrics (reads func derivatives from output_dir)
+    _run_rbc(["metrics", *input_args, *runner_args])
+
+    # 4. qc (reads func derivatives from output_dir)
+    _run_rbc(["qc", *input_args, *runner_args])
+
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Assertion helpers
+# ---------------------------------------------------------------------------
+
+
+def _assert_derivatives_exist(output_dir: Path) -> None:
+    """Check that all expected derivative files are present."""
     sub_dir = output_dir / f"sub-{_SUB}"
 
     # -- Dataset-level metadata --
@@ -85,13 +152,53 @@ def test_rbc_all_completes_and_produces_derivatives(
     for name in func_files:
         assert (func / name).is_file(), f"Missing functional file: {name}"
 
-    # -- QC derivative (at least one regressor) --
+    # -- QC --
     qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv"))
     assert qc_files, "No QC quality TSV files found"
 
-    # -- Metrics derivatives --
-    timeseries = list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv"))
-    assert timeseries, "No timeseries TSV files found"
+    # -- Metrics --
+    assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), (
+        "No timeseries TSV files found"
+    )
+    assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), (
+        "No correlation matrix TSV files found"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
 
-    correlations = list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv"))
-    assert correlations, "No correlation matrix TSV files found"
+
+@pytest.mark.slow
+def test_rbc_all_produces_derivatives(all_output: Path) -> None:
+    """``rbc all`` runs end-to-end and writes expected derivative files."""
+    _assert_derivatives_exist(all_output)
+
+
+@pytest.mark.slow
+def test_sequential_produces_derivatives(sequential_output: Path) -> None:
+    """Running anatomical/functional/metrics/qc individually produces the same files."""
+    _assert_derivatives_exist(sequential_output)
+
+
+@pytest.mark.slow
+def test_all_vs_sequential_outputs_match(
+    all_output: Path,
+    sequential_output: Path,
+) -> None:
+    """Both invocation styles must produce the same set of derivative files."""
+    all_files = _relative_files(all_output)
+    seq_files = _relative_files(sequential_output)
+
+    missing_from_seq = all_files - seq_files
+    extra_in_seq = seq_files - all_files
+
+    assert not missing_from_seq, (
+        "Files produced by 'rbc all' but missing from sequential run:\n"
+        + "\n".join(sorted(missing_from_seq))
+    )
+    assert not extra_in_seq, (
+        "Files produced by sequential run but missing from 'rbc all':\n"
+        + "\n".join(sorted(extra_in_seq))
+    )

From 6ed9b5405461c5619a88229a91bc862c6c1fea20 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 11:17:01 -0400
Subject: [PATCH 03/12] Pass derivatives dir as input to sequential stages

The functional/metrics/qc stages need to discover derivatives from
previous stages via load_table, so the output dir must be included
in the input dirs alongside the raw BIDS dataset.
---
 tests/integration/test_all.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index a6cf3f7b..28cf0f01 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -98,19 +98,20 @@ def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) ->
     out.mkdir()
 
     runner_args = ["--runner", _runner, *_COMMON_ARGS]
-    input_args = [str(_TEST_DATASET), "-o", str(out)]
+    raw = str(_TEST_DATASET)
+    deriv = str(out)
 
-    # 1. anatomical
-    _run_rbc(["anatomical", *input_args, *runner_args])
+    # 1. anatomical (raw BIDS only)
+    _run_rbc(["anatomical", raw, "-o", deriv, *runner_args])
 
-    # 2. functional (reads anat derivatives from output_dir)
-    _run_rbc(["functional", *input_args, *runner_args])
+    # 2. functional (raw BIDS + anat derivatives)
+    _run_rbc(["functional", raw, deriv, "-o", deriv, *runner_args])
 
-    # 3. metrics (reads func derivatives from output_dir)
-    _run_rbc(["metrics", *input_args, *runner_args])
+    # 3. metrics (raw BIDS + derivatives from previous stages)
+    _run_rbc(["metrics", raw, deriv, "-o", deriv, *runner_args])
 
-    # 4. qc (reads func derivatives from output_dir)
-    _run_rbc(["qc", *input_args, *runner_args])
+    # 4. qc (raw BIDS + derivatives from previous stages)
+    _run_rbc(["qc", raw, deriv, "-o", deriv, *runner_args])
 
     return out
 

From d0e5d0366443aeb4661d5065989326815e59701b Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 14:22:35 -0400
Subject: [PATCH 04/12] Apply bids_safe_label in metrics and QC resolve
 functions

resolve_metrics and resolve_qc queried extra entities with raw regressor
names (e.g. "36-parameter") but export_functional saves them through
bids_safe_label (e.g. "36parameter"). This mismatch caused
FileNotFoundError when running rbc metrics or rbc qc standalone.
---
 src/rbc/bids/metrics.py | 4 ++--
 src/rbc/bids/qc.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rbc/bids/metrics.py b/src/rbc/bids/metrics.py
index b2bda843..422a9918 100644
--- a/src/rbc/bids/metrics.py
+++ b/src/rbc/bids/metrics.py
@@ -47,13 +47,13 @@ def resolve_metrics(
             deriv_df,
             suffix=Suffix.BOLD,
             desc="regressed",
-            extra={"reg": regressor},
+            extra={"reg": bids_safe_label(regressor)},
         ),
         "cleaned_bold": mni_q.expect(
             deriv_df,
             suffix=Suffix.BOLD,
             desc="preproc",
-            extra={"reg": regressor},
+            extra={"reg": bids_safe_label(regressor)},
         ),
     }
 
diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py
index f249d619..e33154a7 100644
--- a/src/rbc/bids/qc.py
+++ b/src/rbc/bids/qc.py
@@ -57,7 +57,7 @@ def resolve_qc(
                 deriv_df,
                 suffix=Suffix.BOLD,
                 desc="preproc",
-                extra={"reg": reg},
+                extra={"reg": bids_safe_label(reg)},
             )
             for reg in regressors
         },

From dc372e3137fdff554e52cf07d631525cdf8afa56 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 14:26:14 -0400
Subject: [PATCH 05/12] Truncate BOLD to 25 volumes in integration test fixture

Copies ds000001 and truncates the BOLD timeseries before running the
pipeline, cutting functional processing time significantly while still
exercising the full code path (motion correction, nuisance regression,
metrics, QC).
---
 tests/integration/test_all.py | 45 +++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index 28cf0f01..e365bd85 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -4,6 +4,9 @@
 individual subcommands in sequence (disk round-trip), then verifies that both
 approaches complete without errors, produce key derivative files, and yield
 identical outputs.
+
+The BOLD timeseries is truncated to a small number of volumes so that
+functional processing finishes in minutes rather than tens of minutes.
 """
 
 from __future__ import annotations
@@ -13,18 +16,24 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import nibabel as nib
+import numpy as np
 import pytest
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
-_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
+_ORIG_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
 
 # Subject with no session in ds000001.
 _SUB = "01"
 _TASK = "balloonanalogrisktask"
 _RUN = "01"
 
+# Number of BOLD volumes to keep. Must be enough for nuisance regression
+# (after discarding --start-tr 2) but small enough to be fast.
+_BOLD_VOLUMES = 25
+
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -73,14 +82,36 @@ def _runner(request: pytest.FixtureRequest) -> str:
 
 
 @pytest.fixture(scope="session")
-def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
+def truncated_dataset(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    """Copy ds000001 and truncate the BOLD to *_BOLD_VOLUMES* volumes."""
+    dest = tmp_path_factory.mktemp("ds") / "ds000001"
+    shutil.copytree(_ORIG_DATASET, dest)
+
+    bold = (
+        dest
+        / f"sub-{_SUB}"
+        / "func"
+        / f"sub-{_SUB}_task-{_TASK}_run-{_RUN}_bold.nii.gz"
+    )
+    img = nib.nifti1.load(bold)
+    data = np.asarray(img.dataobj[:, :, :, :_BOLD_VOLUMES])
+    nib.nifti1.save(nib.Nifti1Image(data, img.affine, img.header), bold)
+    return dest
+
+
+@pytest.fixture(scope="session")
+def all_output(
+    tmp_path_factory: pytest.TempPathFactory,
+    _runner: str,
+    truncated_dataset: Path,
+) -> Path:
     """Run ``rbc all`` once and return the output directory."""
     out = tmp_path_factory.mktemp("all") / "derivatives"
     out.mkdir()
     _run_rbc(
         [
             "all",
-            str(_TEST_DATASET),
+            str(truncated_dataset),
             "-o",
             str(out),
             "--runner",
@@ -92,13 +123,17 @@ def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
 
 
 @pytest.fixture(scope="session")
-def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
+def sequential_output(
+    tmp_path_factory: pytest.TempPathFactory,
+    _runner: str,
+    truncated_dataset: Path,
+) -> Path:
     """Run the four individual subcommands in order and return the output dir."""
     out = tmp_path_factory.mktemp("sequential") / "derivatives"
     out.mkdir()
 
     runner_args = ["--runner", _runner, *_COMMON_ARGS]
-    raw = str(_TEST_DATASET)
+    raw = str(truncated_dataset)
     deriv = str(out)
 
     # 1. anatomical (raw BIDS only)

From 8574005aca9396fdf430fc4c1e5c01a5500f3d3d Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 16:44:19 -0400
Subject: [PATCH 06/12] Bump BOLD truncation to 50 volumes for 36-parameter
 regression

25 volumes was too few: after discarding 2 TRs, only 23 timepoints
remained, which is fewer than the ~36 columns in the 36-parameter
nuisance regressor model, causing 3dTproject to fail.
---
 tests/integration/test_all.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index e365bd85..b1d09d2f 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -30,9 +30,10 @@
 _TASK = "balloonanalogrisktask"
 _RUN = "01"
 
-# Number of BOLD volumes to keep. Must be enough for nuisance regression
-# (after discarding --start-tr 2) but small enough to be fast.
-_BOLD_VOLUMES = 25
+# Number of BOLD volumes to keep. The 36-parameter regressor model needs
+# more timepoints than regressors (~36 columns + derivatives). After
+# discarding --start-tr 2, we need at least ~45 usable volumes.
+_BOLD_VOLUMES = 50
 
 
 # ---------------------------------------------------------------------------

From 5167f70e62b4f61f0451381f9fbae203fc3668f6 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 16:46:35 -0400
Subject: [PATCH 07/12] Revert BOLD truncation, use full test dataset for now

Remove the volume truncation optimization so we can shake out all bugs
with the real data first. Can revisit truncation once the pipeline
passes end-to-end.
---
 tests/integration/test_all.py | 46 ++++-------------------------------
 1 file changed, 5 insertions(+), 41 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index b1d09d2f..28cf0f01 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -4,9 +4,6 @@
 individual subcommands in sequence (disk round-trip), then verifies that both
 approaches complete without errors, produce key derivative files, and yield
 identical outputs.
-
-The BOLD timeseries is truncated to a small number of volumes so that
-functional processing finishes in minutes rather than tens of minutes.
 """
 
 from __future__ import annotations
@@ -16,25 +13,18 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-import nibabel as nib
-import numpy as np
 import pytest
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
-_ORIG_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
+_TEST_DATASET = Path(__file__).parents[1] / "data" / "ds000001"
 
 # Subject with no session in ds000001.
 _SUB = "01"
 _TASK = "balloonanalogrisktask"
 _RUN = "01"
 
-# Number of BOLD volumes to keep. The 36-parameter regressor model needs
-# more timepoints than regressors (~36 columns + derivatives). After
-# discarding --start-tr 2, we need at least ~45 usable volumes.
-_BOLD_VOLUMES = 50
-
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -83,36 +73,14 @@ def _runner(request: pytest.FixtureRequest) -> str:
 
 
 @pytest.fixture(scope="session")
-def truncated_dataset(tmp_path_factory: pytest.TempPathFactory) -> Path:
-    """Copy ds000001 and truncate the BOLD to *_BOLD_VOLUMES* volumes."""
-    dest = tmp_path_factory.mktemp("ds") / "ds000001"
-    shutil.copytree(_ORIG_DATASET, dest)
-
-    bold = (
-        dest
-        / f"sub-{_SUB}"
-        / "func"
-        / f"sub-{_SUB}_task-{_TASK}_run-{_RUN}_bold.nii.gz"
-    )
-    img = nib.nifti1.load(bold)
-    data = np.asarray(img.dataobj[:, :, :, :_BOLD_VOLUMES])
-    nib.nifti1.save(nib.Nifti1Image(data, img.affine, img.header), bold)
-    return dest
-
-
-@pytest.fixture(scope="session")
-def all_output(
-    tmp_path_factory: pytest.TempPathFactory,
-    _runner: str,
-    truncated_dataset: Path,
-) -> Path:
+def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
     """Run ``rbc all`` once and return the output directory."""
     out = tmp_path_factory.mktemp("all") / "derivatives"
     out.mkdir()
     _run_rbc(
         [
             "all",
-            str(truncated_dataset),
+            str(_TEST_DATASET),
             "-o",
             str(out),
             "--runner",
@@ -124,17 +92,13 @@ def all_output(
 
 
 @pytest.fixture(scope="session")
-def sequential_output(
-    tmp_path_factory: pytest.TempPathFactory,
-    _runner: str,
-    truncated_dataset: Path,
-) -> Path:
+def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
     """Run the four individual subcommands in order and return the output dir."""
     out = tmp_path_factory.mktemp("sequential") / "derivatives"
     out.mkdir()
 
     runner_args = ["--runner", _runner, *_COMMON_ARGS]
-    raw = str(truncated_dataset)
+    raw = str(_TEST_DATASET)
     deriv = str(out)
 
     # 1. anatomical (raw BIDS only)

From 4404b9a1e7579bdcc1b255a17b72b020f78d1a94 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 20:54:14 -0400
Subject: [PATCH 08/12] Reuse anatomical outputs from rbc-all in sequential
 test

The sequential test now copies anat derivatives from the rbc-all run
instead of re-running brain extraction and registration, saving ~30-40
min of CI time. The disk round-trip handoff between functional, metrics,
and QC stages is still fully tested.
---
 tests/integration/test_all.py | 44 ++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index 28cf0f01..a7a316a0 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -1,9 +1,12 @@
 """Integration test for ``rbc all`` pipeline stage handoff.
 
-Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the four
-individual subcommands in sequence (disk round-trip), then verifies that both
-approaches complete without errors, produce key derivative files, and yield
-identical outputs.
+Runs the full pipeline via ``rbc all`` (in-memory handoff) and via the
+individual subcommands in sequence (disk round-trip), then verifies that
+both approaches complete without errors, produce key derivative files,
+and yield identical outputs.
+
+The sequential run reuses anatomical outputs from the ``rbc all`` run to
+avoid running brain extraction and registration twice.
 """
 
 from __future__ import annotations
@@ -92,25 +95,40 @@ def all_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
 
 
 @pytest.fixture(scope="session")
-def sequential_output(tmp_path_factory: pytest.TempPathFactory, _runner: str) -> Path:
-    """Run the four individual subcommands in order and return the output dir."""
+def sequential_output(
+    tmp_path_factory: pytest.TempPathFactory,
+    _runner: str,
+    all_output: Path,
+) -> Path:
+    """Run functional/metrics/qc using anat outputs from ``rbc all``.
+
+    Copies the anatomical derivatives produced by the ``all_output``
+    fixture so that only the functional, metrics, and QC stages run,
+    saving ~30-40 min of redundant brain extraction and registration.
+    """
     out = tmp_path_factory.mktemp("sequential") / "derivatives"
     out.mkdir()
 
+    # Seed with anatomical outputs + dataset_description from rbc all
+    anat_src = all_output / f"sub-{_SUB}" / "anat"
+    anat_dst = out / f"sub-{_SUB}" / "anat"
+    shutil.copytree(anat_src, anat_dst)
+    shutil.copy2(
+        all_output / "dataset_description.json",
+        out / "dataset_description.json",
+    )
+
     runner_args = ["--runner", _runner, *_COMMON_ARGS]
     raw = str(_TEST_DATASET)
     deriv = str(out)
 
-    # 1. anatomical (raw BIDS only)
-    _run_rbc(["anatomical", raw, "-o", deriv, *runner_args])
-
-    # 2. functional (raw BIDS + anat derivatives)
+    # functional (raw BIDS + anat derivatives)
     _run_rbc(["functional", raw, deriv, "-o", deriv, *runner_args])
 
-    # 3. metrics (raw BIDS + derivatives from previous stages)
+    # metrics (derivatives from previous stages)
     _run_rbc(["metrics", raw, deriv, "-o", deriv, *runner_args])
 
-    # 4. qc (raw BIDS + derivatives from previous stages)
+    # qc (derivatives from previous stages)
     _run_rbc(["qc", raw, deriv, "-o", deriv, *runner_args])
 
     return out
@@ -179,7 +197,7 @@ def test_rbc_all_produces_derivatives(all_output: Path) -> None:
 
 @pytest.mark.slow
 def test_sequential_produces_derivatives(sequential_output: Path) -> None:
-    """Running anatomical/functional/metrics/qc individually produces the same files."""
+    """Running functional/metrics/qc individually produces the same files."""
     _assert_derivatives_exist(sequential_output)
 
 

From d064f8bd5312d535cf8306b33648728829dde234 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 21:01:14 -0400
Subject: [PATCH 09/12] Print derivative file tree on assertion failure for CI
 debugging

---
 tests/integration/test_all.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index a7a316a0..b5930c0d 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -65,6 +65,12 @@ def _relative_files(root: Path) -> set[str]:
     return {str(p.relative_to(root)) for p in root.rglob("*") if p.is_file()}
 
 
+def _file_tree(root: Path) -> str:
+    """Return a newline-separated listing of all files under *root*."""
+    files = sorted(p.relative_to(root) for p in root.rglob("*") if p.is_file())
+    return "\n".join(str(f) for f in files) if files else "(empty)"
+
+
 # ---------------------------------------------------------------------------
 # Fixtures — each pipeline variant runs once per session
 # ---------------------------------------------------------------------------
@@ -141,10 +147,13 @@ def sequential_output(
 
 def _assert_derivatives_exist(output_dir: Path) -> None:
     """Check that all expected derivative files are present."""
+    tree = _file_tree(output_dir)
     sub_dir = output_dir / f"sub-{_SUB}"
 
     # -- Dataset-level metadata --
-    assert (output_dir / "dataset_description.json").is_file()
+    assert (output_dir / "dataset_description.json").is_file(), (
+        f"Missing dataset_description.json\n--- file tree ---\n{tree}"
+    )
 
     # -- Anatomical derivatives --
     anat = sub_dir / "anat"
@@ -157,7 +166,9 @@ def _assert_derivatives_exist(output_dir: Path) -> None:
         f"sub-{_SUB}_desc-wmBBR_mask.nii.gz",
     ]
     for name in anat_files:
-        assert (anat / name).is_file(), f"Missing anatomical file: {name}"
+        assert (anat / name).is_file(), (
+            f"Missing anatomical file: {name}\n--- file tree ---\n{tree}"
+        )
 
     # -- Functional derivatives --
     func = sub_dir / "func"
@@ -169,18 +180,20 @@ def _assert_derivatives_exist(output_dir: Path) -> None:
         f"{bold_stem}_desc-brain_mask.nii.gz",
     ]
     for name in func_files:
-        assert (func / name).is_file(), f"Missing functional file: {name}"
+        assert (func / name).is_file(), (
+            f"Missing functional file: {name}\n--- file tree ---\n{tree}"
+        )
 
     # -- QC --
     qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv"))
-    assert qc_files, "No QC quality TSV files found"
+    assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}"
 
     # -- Metrics --
     assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), (
-        "No timeseries TSV files found"
+        f"No timeseries TSV files found\n--- file tree ---\n{tree}"
     )
     assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), (
-        "No correlation matrix TSV files found"
+        f"No correlation matrix TSV files found\n--- file tree ---\n{tree}"
     )
 
 

From 554e3b351df5d4e68c5ef3468b0d6037aa2f1864 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 21:05:05 -0400
Subject: [PATCH 10/12] Fix run entity format and QC duplicate-match bug

The BIDS path builder formats integer run entities as "run-1" (not
"run-01"), so update the test assertion accordingly.

resolve_qc's template_bold query matched both the untagged preproc bold
and per-regressor preproc bolds. Add without=["reg"] to disambiguate.
---
 src/rbc/bids/qc.py            | 4 +++-
 tests/integration/test_all.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py
index e33154a7..e449a696 100644
--- a/src/rbc/bids/qc.py
+++ b/src/rbc/bids/qc.py
@@ -51,7 +51,9 @@ def resolve_qc(
         Dict with keys matching ``single_session_qc`` parameters.
     """
     return {
-        "template_bold": func_mni.expect(deriv_df, suffix=Suffix.BOLD, desc="preproc"),
+        "template_bold": func_mni.expect(
+            deriv_df, suffix=Suffix.BOLD, desc="preproc", without=["reg"]
+        ),
         "cleaned_bold": {
             reg: func_mni.expect(
                 deriv_df,
diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index b5930c0d..511402fd 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -26,7 +26,7 @@
 # Subject with no session in ds000001.
 _SUB = "01"
 _TASK = "balloonanalogrisktask"
-_RUN = "01"
+_RUN = "1"
 
 
 # ---------------------------------------------------------------------------

From e91466a6eec1dd56e39b16982ca191ffaf0eb29e Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Thu, 9 Apr 2026 22:55:01 -0400
Subject: [PATCH 11/12] Fix QC resolve to exclude reg extra entity and relax QC
 glob pattern

Use extra={"reg": False} instead of without=["reg"] since reg is an
extra entity (not a standard BIDS column). Also relax the QC glob
pattern since entity ordering puts reg before desc in the filename.
---
 src/rbc/bids/qc.py            | 2 +-
 tests/integration/test_all.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py
index e449a696..d036c05a 100644
--- a/src/rbc/bids/qc.py
+++ b/src/rbc/bids/qc.py
@@ -52,7 +52,7 @@ def resolve_qc(
     """
     return {
         "template_bold": func_mni.expect(
-            deriv_df, suffix=Suffix.BOLD, desc="preproc", without=["reg"]
+            deriv_df, suffix=Suffix.BOLD, desc="preproc", extra={"reg": False}
         ),
         "cleaned_bold": {
             reg: func_mni.expect(
diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py
index 511402fd..fc6159fd 100644
--- a/tests/integration/test_all.py
+++ b/tests/integration/test_all.py
@@ -185,7 +185,7 @@ def _assert_derivatives_exist(output_dir: Path) -> None:
         )
 
     # -- QC --
-    qc_files = list(func.glob(f"{bold_stem}_space-*_desc-xcp_*_quality.tsv"))
+    qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.tsv"))
     assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}"
 
     # -- Metrics --

From fb9ec6d208eb5627f0809979011b9e5aaf8dd763 Mon Sep 17 00:00:00 2001
From: Florian Rupprecht <floruppr@gmail.com>
Date: Fri, 10 Apr 2026 00:50:37 -0400
Subject: [PATCH 12/12] Route metrics and QC intermediates to niwrap work dir

compute_alff, compute_reho, and compute_timeseries default to writing
outputs next to their input files. When the standalone metrics/QC
orchestration resolves inputs from the derivatives dir, these
intermediates leak into the output alongside the BIDS-named exports.

Use generate_exec_folder() to create a dedicated work directory under
the niwrap runner's data_dir, and pass explicit out_file/out_dir paths
so intermediates stay out of the derivatives tree.
---
 src/rbc/workflows/metrics.py | 17 ++++++++++++++---
 src/rbc/workflows/qc.py      |  6 ++++--
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/rbc/workflows/metrics.py b/src/rbc/workflows/metrics.py
index 06e4da0f..929f2d9d 100644
--- a/src/rbc/workflows/metrics.py
+++ b/src/rbc/workflows/metrics.py
@@ -17,6 +17,7 @@
 from rbc.core.metrics.smoothing import smooth
 from rbc.core.metrics.standardization import compute_zscore
 from rbc.core.metrics.timeseries import compute_timeseries
+from rbc.core.niwrap import generate_exec_folder
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
@@ -76,15 +77,23 @@ def single_session_metrics(
     Returns:
         All metric outputs bundled in a :class:`MetricsOutputs` tuple.
     """
+    work_dir = generate_exec_folder("metrics")
+
     # 1. ALFF / fALFF on regressed BOLD (non-bandpassed)
     _logger.info("Computing ALFF/fALFF")
     alff_path, falff_path = compute_alff(
-        regressed_bold, template_brain_mask, tr=tr, method="qm"
+        regressed_bold,
+        template_brain_mask,
+        tr=tr,
+        method="qm",
+        out_file=work_dir / "alff.nii.gz",
     )
 
     # 2. ReHo on bandpass-filtered cleaned BOLD
     _logger.info("Computing ReHo")
-    reho_path = compute_reho(cleaned_bold, template_brain_mask)
+    reho_path = compute_reho(
+        cleaned_bold, template_brain_mask, out_file=work_dir / "reho.nii.gz"
+    )
 
     # 3. Smooth raw maps
     _logger.info("Smoothing maps (FWHM=%.1f mm)", fwhm)
@@ -103,7 +112,9 @@ def single_session_metrics(
     ts_outputs = {}
     for label, atlas_path in atlas_files.items():
         _logger.info("Extracting atlas timeseries (%s)", label)
-        ts_outputs[label] = compute_timeseries(cleaned_bold, atlas_path)
+        ts_outputs[label] = compute_timeseries(
+            cleaned_bold, atlas_path, out_dir=work_dir
+        )
 
     return MetricsOutputs(
         alff=alff_path,
diff --git a/src/rbc/workflows/qc.py b/src/rbc/workflows/qc.py
index 096cb14c..d2558428 100644
--- a/src/rbc/workflows/qc.py
+++ b/src/rbc/workflows/qc.py
@@ -17,6 +17,7 @@
 from niwrap import fsl
 
 from rbc.bids import TemplateSpace
+from rbc.core.niwrap import generate_exec_folder
 from rbc.core.qc.dvars import dvars_qc_metrics
 from rbc.core.qc.motion import framewise_displacement_jenkinson, motion_qc_metrics
 from rbc.core.qc.registration import registration_qc_metrics
@@ -85,6 +86,8 @@ def single_session_qc(
         All QC outputs bundled in a :class:`QCOutputs` tuple.
     """
     _logger.info("Computing QC metrics")
+    work_dir = generate_exec_folder("qc")
+
     # 1. Load motion data
     rms_values = np.loadtxt(rms_rel)
     motion_data = np.loadtxt(motion_params)
@@ -157,8 +160,7 @@ def single_session_qc(
         # 9. Write QC TSV
         qc_outputs.qc_file[regressor] = write_xcp_qc(
             qc_outputs.metrics[regressor],
-            template_bold.parent
-            / f"sub-{sub}_ses-{ses}_task-{task}_run-{run}_reg-{regressor}_qc.tsv",
+            work_dir / f"reg-{regressor}_qc.tsv",
         )
 
     # 10. RBC pass/fail