Fix per-atlas overwrite in single_session_metrics + atlas dtype (#344)

nx10 · web-flow · commit 7cfa7582a292 · 2026-05-22T20:19:18.000-04:00
`compute_timeseries` derives its output filename from the BOLD stem, so
every atlas iteration in `single_session_metrics` was writing to the same
file. Every `MetricsOutputs.timeseries[label]` then pointed at the
last-processed atlas's data, silently corrupting the regular RBC pipeline's
atlas outputs (timeseries + Pearson correlations). Give each atlas its own
`out_dir`.

Also switch `compute_timeseries` from `get_fdata().astype(int)` to
`np.asarray(atlas_img.dataobj).astype(int)` so integer atlas labels survive
verbatim. `get_fdata` would apply `scl_slope`/`scl_inter` and scale small
labels into garbage floats if an atlas mistakenly ships with non-trivial
scaling.

Regression test in `tests/unit/workflows/test_metrics.py` builds two
atlases with different ROI counts (3 and 5) and asserts each is preserved
in `MetricsOutputs`, with distinct file paths.
diff --git a/src/rbc/core/metrics/timeseries.py b/src/rbc/core/metrics/timeseries.py
@@ -148,7 +148,10 @@ def compute_timeseries(
     if atlas_img.shape[:3] != img.shape[:3]:
         atlas_img = resample_from_to(atlas_img, (img.shape[:3], img.affine), order=0)
 
-    atlas_data = atlas_img.get_fdata().astype(int)
+    # Read via ``dataobj`` so the on-disk integer labels survive verbatim;
+    # ``get_fdata`` would apply ``scl_slope``/``scl_inter`` and scale small
+    # labels into garbage floats if the atlas ships with non-trivial scaling.
+    atlas_data = np.asarray(atlas_img.dataobj).astype(int)
 
     data = img.get_fdata()
     labels = np.unique(atlas_data)
diff --git a/src/rbc/workflows/metrics.py b/src/rbc/workflows/metrics.py
@@ -108,12 +108,15 @@ def single_session_metrics(
     reho_zscored_path = compute_zscore(reho_smooth_path, template_brain_mask)
 
     # 5. Atlas timeseries + correlation matrix from nuisance-regressed,
-    # bandpass-filtered BOLD
+    # bandpass-filtered BOLD. Each atlas needs its own ``out_dir`` so the
+    # BOLD-stem-derived output filename doesn't collide across atlases.
     ts_outputs = {}
     for label, atlas_path in atlas_files.items():
         _logger.info("Extracting atlas timeseries (%s)", label)
+        atlas_dir = work_dir / f"atlas-{label}"
+        atlas_dir.mkdir(parents=True, exist_ok=True)
         ts_outputs[label] = compute_timeseries(
-            cleaned_bold, atlas_path, out_dir=work_dir
+            cleaned_bold, atlas_path, out_dir=atlas_dir
         )
 
     return MetricsOutputs(
diff --git a/tests/unit/workflows/__init__.py b/tests/unit/workflows/__init__.py
@@ -0,0 +1 @@
+"""Workflow module tests."""
diff --git a/tests/unit/workflows/test_metrics.py b/tests/unit/workflows/test_metrics.py
@@ -0,0 +1,111 @@
+"""Unit tests for rbc.workflows.metrics."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import nibabel as nib
+import numpy as np
+
+from rbc.workflows import metrics as metrics_mod
+from rbc.workflows.metrics import single_session_metrics
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    import pytest
+
+
+def _save_nifti(path: Path, data: np.ndarray) -> None:
+    nib.nifti1.Nifti1Image(data, affine=np.eye(4)).to_filename(str(path))
+
+
+def test_atlas_outputs_are_per_atlas(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Each atlas's timeseries must land in its own file with its own ROI count.
+
+    Regression for a prior single_session_metrics bug where every atlas's
+    ``compute_timeseries`` call shared the same ``out_dir`` and overwrote
+    the previous atlas's file; ``MetricsOutputs.timeseries[label]`` ended
+    up pointing at the last-iterated atlas's data for every key.
+    """
+    rng = np.random.default_rng(0)
+    bold = rng.standard_normal((6, 6, 6, 8))
+    mask = np.ones((6, 6, 6), dtype=np.int16)
+
+    atlas_3 = np.zeros((6, 6, 6), dtype=np.int16)
+    atlas_3[0:2] = 1
+    atlas_3[2:4] = 2
+    atlas_3[4:6] = 3
+
+    atlas_5 = np.zeros((6, 6, 6), dtype=np.int16)
+    for i in range(5):
+        atlas_5[:, i, :] = i + 1
+    # last column unlabeled (kept as 0) so label set is exactly {1..5}
+    atlas_5[:, 5, :] = 0
+
+    bold_path = tmp_path / "bold.nii.gz"
+    mask_path = tmp_path / "mask.nii.gz"
+    atlas_3_path = tmp_path / "atlas3.nii.gz"
+    atlas_5_path = tmp_path / "atlas5.nii.gz"
+    _save_nifti(bold_path, bold)
+    _save_nifti(mask_path, mask.astype(np.float64))
+    _save_nifti(atlas_3_path, atlas_3.astype(np.float64))
+    _save_nifti(atlas_5_path, atlas_5.astype(np.float64))
+
+    # Skip the scalar maps -- this test only cares about the atlas loop.
+    from pathlib import Path as _Path
+
+    counter = {"n": 0}
+
+    def _next_scratch(name: str) -> _Path:
+        counter["n"] += 1
+        p = tmp_path / f"{name}_{counter['n']}.nii.gz"
+        _save_nifti(p, np.zeros((6, 6, 6)))
+        return p
+
+    def _scalar_pair(*_args: object, **kwargs: object) -> tuple[_Path, _Path]:
+        out_file = kwargs.get("out_file")
+        alff = (
+            _Path(out_file)  # type: ignore[arg-type]
+            if out_file is not None
+            else _next_scratch("alff")
+        )
+        if not alff.exists():
+            _save_nifti(alff, np.zeros((6, 6, 6)))
+        return alff, _next_scratch("falff")
+
+    def _scalar_single(*_args: object, **_kwargs: object) -> _Path:
+        return _next_scratch("scalar")
+
+    def _smooth(in_path: _Path, _mask: _Path, **_kwargs: object) -> _Path:
+        return in_path
+
+    monkeypatch.setattr(metrics_mod, "compute_alff", _scalar_pair)
+    monkeypatch.setattr(metrics_mod, "compute_reho", _scalar_single)
+    monkeypatch.setattr(metrics_mod, "smooth", _smooth)
+    monkeypatch.setattr(metrics_mod, "compute_zscore", _scalar_single)
+
+    outputs = single_session_metrics(
+        regressed_bold=bold_path,
+        cleaned_bold=bold_path,
+        template_brain_mask=mask_path,
+        tr=2.0,
+        atlas_files={"atl3": atlas_3_path, "atl5": atlas_5_path},
+        fwhm=6.0,
+    )
+
+    # Distinct files per atlas, never overwriting each other.
+    assert outputs.timeseries["atl3"] != outputs.timeseries["atl5"]
+    assert outputs.correlation_matrix["atl3"] != outputs.correlation_matrix["atl5"]
+
+    ts3 = np.loadtxt(outputs.timeseries["atl3"], delimiter="\t")
+    ts5 = np.loadtxt(outputs.timeseries["atl5"], delimiter="\t")
+    assert ts3.shape == (3, 8)
+    assert ts5.shape == (5, 8)
+
+    corr3 = np.loadtxt(outputs.correlation_matrix["atl3"], delimiter="\t")
+    corr5 = np.loadtxt(outputs.correlation_matrix["atl5"], delimiter="\t")
+    assert corr3.shape == (3, 3)
+    assert corr5.shape == (5, 5)