From f4b2ac50a5eef4b9e8d61bb728e62e6188e5bfa6 Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Thu, 16 Apr 2026 14:53:59 -0400 Subject: [PATCH 1/6] tsv to pq & corr to connectome --- src/rbc/bids/metrics.py | 8 ++-- src/rbc/bids/qc.py | 2 +- src/rbc/core/metrics/timeseries.py | 17 +++++---- src/rbc/core/qc/xcp.py | 5 ++- src/rbc/workflows/metrics.py | 10 ++--- src/rbc/workflows/qc.py | 2 +- tests/integration/functional/qc/test_xcp.py | 8 +--- tests/integration/test_all.py | 6 +-- tests/unit/bids/test_exports.py | 12 +++--- tests/unit/core/qc/test_xcp.py | 42 ++++++++++----------- tests/unit/core/test_timeseries.py | 17 +++++---- tests/unit/orchestration/test_qc.py | 2 +- 12 files changed, 63 insertions(+), 68 deletions(-) diff --git a/src/rbc/bids/metrics.py b/src/rbc/bids/metrics.py index 422a9918..fb81e460 100644 --- a/src/rbc/bids/metrics.py +++ b/src/rbc/bids/metrics.py @@ -89,13 +89,13 @@ def export_metrics( outputs.timeseries[atl], suffix="timeseries", desc="mean", - extension=".tsv", + extension=".parquet", atlas=bids_safe_label(atl), ) mex.save( - outputs.correlation_matrix[atl], - suffix="correlations", + outputs.connectome[atl], + suffix="connectome", desc="pearson", - extension=".tsv", + extension=".parquet", atlas=bids_safe_label(atl), ) diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py index d036c05a..b5e52feb 100644 --- a/src/rbc/bids/qc.py +++ b/src/rbc/bids/qc.py @@ -114,6 +114,6 @@ def export_qc( outputs.qc_file[reg], suffix="quality", desc="xcp", - extension=".tsv", + extension=".parquet", extra={"reg": bids_safe_label(reg)}, ) diff --git a/src/rbc/core/metrics/timeseries.py b/src/rbc/core/metrics/timeseries.py index 3735f005..4beda412 100644 --- a/src/rbc/core/metrics/timeseries.py +++ b/src/rbc/core/metrics/timeseries.py @@ -11,6 +11,7 @@ from typing import NamedTuple import numpy as np +import polars as pl from rbc.core.niwrap import generate_exec_folder @@ -113,7 +114,7 @@ class TimeseriesOutputs(NamedTuple): """Outputs from :func:`compute_timeseries`.""" timeseries: Path - correlation_matrix: Path + connectome: Path labels: np.ndarray @@ -134,7 +135,7 @@ def compute_timeseries( *in_file*. Returns: - :class:`TimeseriesOutputs` containing paths to the TSV files and + :class:`TimeseriesOutputs` containing paths to the Parquet files and the ROI labels array. """ import nibabel as nib @@ -163,14 +164,16 @@ def compute_timeseries( out_dir.mkdir(parents=True, exist_ok=True) stem = in_file.name.split(".nii")[0] - ts_path = out_dir / f"{stem}_timeseries.tsv" - corr_path = out_dir / f"{stem}_correlation_matrix.tsv" + ts_path = out_dir / f"{stem}_timeseries.parquet" + corr_path = out_dir / f"{stem}_connectome.parquet" - np.savetxt(ts_path, ts, delimiter="\t") - np.savetxt(corr_path, corr, delimiter="\t") + col_names = [str(label) for label in labels] + + pl.DataFrame(ts, schema=col_names).write_parquet(ts_path) + pl.DataFrame(corr, schema=col_names).write_parquet(corr_path) return TimeseriesOutputs( timeseries=ts_path, - correlation_matrix=corr_path, + connectome=corr_path, labels=labels, ) diff --git a/src/rbc/core/qc/xcp.py b/src/rbc/core/qc/xcp.py index 63b07ff0..292031b3 100644 --- a/src/rbc/core/qc/xcp.py +++ b/src/rbc/core/qc/xcp.py @@ -143,11 +143,12 @@ def generate_xcp_qc( def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path: - """Write XCP QC metrics as a single-row TSV file. + """Write XCP QC metrics as a single-row Parquet file. Args: metrics: A populated :class:`XCPQCMetrics` row. out_path: Destination file path (parent dirs created if needed). + Should have a ``.parquet`` extension. Returns: The output path (same as *out_path*). @@ -156,7 +157,7 @@ def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path: out_path.parent.mkdir(parents=True, exist_ok=True) df = pl.DataFrame([metrics._asdict()]) - df.write_csv(out_path, separator="\t") + df.write_parquet(out_path) return out_path diff --git a/src/rbc/workflows/metrics.py b/src/rbc/workflows/metrics.py index 929f2d9d..701c8508 100644 --- a/src/rbc/workflows/metrics.py +++ b/src/rbc/workflows/metrics.py @@ -39,8 +39,8 @@ class MetricsOutputs(NamedTuple): reho: Raw ReHo map. reho_smooth: Smoothed ReHo map. reho_zscored: Z-scored (smoothed) ReHo map. - timeseries: Atlas-based mean timeseries TSV. - correlation_matrix: Pairwise correlation matrix TSV. + timeseries: Atlas-based mean timeseries file. + connectome: Pairwise connectome file. """ alff: Path @@ -53,7 +53,7 @@ class MetricsOutputs(NamedTuple): reho_smooth: Path reho_zscored: Path timeseries: dict[str, Path] - correlation_matrix: dict[str, Path] + connectome: dict[str, Path] def single_session_metrics( @@ -127,7 +127,5 @@ def single_session_metrics( reho_smooth=reho_smooth_path, reho_zscored=reho_zscored_path, timeseries={label: ts.timeseries for label, ts in ts_outputs.items()}, - correlation_matrix={ - label: ts.correlation_matrix for label, ts in ts_outputs.items() - }, + connectome={label: ts.connectome for label, ts in ts_outputs.items()}, ) diff --git a/src/rbc/workflows/qc.py b/src/rbc/workflows/qc.py index d2558428..46d27b40 100644 --- a/src/rbc/workflows/qc.py +++ b/src/rbc/workflows/qc.py @@ -160,7 +160,7 @@ def single_session_qc( # 9. Write QC TSV qc_outputs.qc_file[regressor] = write_xcp_qc( qc_outputs.metrics[regressor], - work_dir / f"reg-{regressor}_qc.tsv", + work_dir / f"reg-{regressor}_qc.parquet", ) # 10. RBC pass/fail diff --git a/tests/integration/functional/qc/test_xcp.py b/tests/integration/functional/qc/test_xcp.py index 5c7f81c4..212d4912 100644 --- a/tests/integration/functional/qc/test_xcp.py +++ b/tests/integration/functional/qc/test_xcp.py @@ -72,15 +72,11 @@ def test_xcp_qc_from_bold( assert metrics.meanDVInit >= 0 # Write TSV and verify - out_path = tmp_path / "xcp_qc.tsv" + out_path = tmp_path / "xcp_qc.parquet" write_xcp_qc(metrics, out_path) assert out_path.exists() - df = pl.read_csv( - out_path, - separator="\t", - schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8}, - ) + df = pl.read_parquet(out_path) assert df.shape == (1, 24) assert df["sub"][0] == test_subject.subject_id assert df["meanFD"][0] == metrics.meanFD diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index fc6159fd..0ead5f8a 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -185,14 +185,14 @@ def _assert_derivatives_exist(output_dir: Path) -> None: ) # -- QC -- - qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.tsv")) + qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.parquet")) assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}" # -- Metrics -- - assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), ( + assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.parquet")), ( f"No timeseries TSV files found\n--- file tree ---\n{tree}" ) - assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), ( + assert list(func.glob(f"{bold_stem}_space-*_*_connectome.parquet")), ( f"No correlation matrix TSV files found\n--- file tree ---\n{tree}" ) diff --git a/tests/unit/bids/test_exports.py b/tests/unit/bids/test_exports.py index 33ec4852..02dfc5fa 100644 --- a/tests/unit/bids/test_exports.py +++ b/tests/unit/bids/test_exports.py @@ -90,8 +90,8 @@ def _make_metrics_outputs(w: Path, atlases: list[str]) -> MetricsOutputs: reho=_dummy(w, "reho.nii.gz"), reho_smooth=_dummy(w, "reho_smooth.nii.gz"), reho_zscored=_dummy(w, "reho_z.nii.gz"), - timeseries={a: _dummy(w, f"ts_{a}.tsv") for a in atlases}, - correlation_matrix={a: _dummy(w, f"corr_{a}.tsv") for a in atlases}, + timeseries={a: _dummy(w, f"ts_{a}.parquet") for a in atlases}, + connectome={a: _dummy(w, f"connectome_{a}.parquet") for a in atlases}, ) @@ -296,9 +296,9 @@ class _FakeQC: qc_file: dict[str, Path] = field(default_factory=dict) mni = func_bids.derive(space="MNI152NLin6Asym") - qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.tsv")}) + qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.parquet")}) export_qc(mni, qc, regressors=["36-parameter"]) # type: ignore[arg-type] - saved = list(pipe_ctx.output_dir.rglob("*.tsv")) + saved = list(pipe_ctx.output_dir.rglob("*.parquet")) assert len(saved) == 1 assert "reg-36parameter" in saved[0].name @@ -314,7 +314,7 @@ class _FakeQC: regs = ["36-parameter", "aCompCor"] mni = func_bids.derive(space="MNI152NLin6Asym") - qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.tsv") for r in regs}) + qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.parquet") for r in regs}) export_qc(mni, qc, regressors=regs) # type: ignore[arg-type] - saved = list(pipe_ctx.output_dir.rglob("*.tsv")) + saved = list(pipe_ctx.output_dir.rglob("*.parquet")) assert len(saved) == 2 diff --git a/tests/unit/core/qc/test_xcp.py b/tests/unit/core/qc/test_xcp.py index 1fb43349..faf0b3ce 100644 --- a/tests/unit/core/qc/test_xcp.py +++ b/tests/unit/core/qc/test_xcp.py @@ -110,8 +110,8 @@ def _sample_xcp_metrics() -> XCPQCMetrics: class TestXCPQCMetrics: """Tests for XCPQCMetrics NamedTuple structure.""" - def test_field_names_match_tsv_columns(self) -> None: - """NamedTuple field names exactly match expected TSV columns.""" + def test_field_names_match_pq_columns(self) -> None: + """NamedTuple field names exactly match expected Parquet columns.""" assert list(XCPQCMetrics._fields) == EXPECTED_COLUMNS def test_field_count(self) -> None: @@ -224,38 +224,34 @@ class TestWriteXcpQc: def test_writes_file(self, tmp_path: Path) -> None: """Output file is created.""" - out = tmp_path / "qc.tsv" + out = tmp_path / "qc.parquet" result = write_xcp_qc(_sample_xcp_metrics(), out) assert result == out assert out.exists() def test_correct_headers(self, tmp_path: Path) -> None: - """TSV header row matches expected column names.""" - out = tmp_path / "qc.tsv" + """Parquet column names match expected column names.""" + out = tmp_path / "qc.parquet" write_xcp_qc(_sample_xcp_metrics(), out) - header = out.read_text().splitlines()[0].split("\t") - assert header == EXPECTED_COLUMNS + df = pl.read_parquet(out) + assert df.columns == EXPECTED_COLUMNS def test_correct_values(self, tmp_path: Path) -> None: """Values in TSV match the input metrics.""" m = _sample_xcp_metrics() - out = tmp_path / "qc.tsv" + out = tmp_path / "qc.parquet" write_xcp_qc(m, out) - values = out.read_text().splitlines()[1].split("\t") - assert values[0] == "01" # sub - assert values[3] == "1" # run - assert float(values[7]) == m.meanFD + df = pl.read_parquet(out) + assert df["sub"][0] == "01" # sub + assert df["run"][0] == 1 # run + assert df["meanFD"][0] == m.meanFD def test_round_trip_polars(self, tmp_path: Path) -> None: """Polars can read back the TSV and recover the values.""" m = _sample_xcp_metrics() - out = tmp_path / "qc.tsv" + out = tmp_path / "qc.parquet" write_xcp_qc(m, out) - df = pl.read_csv( - out, - separator="\t", - schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8}, - ) + df = pl.read_parquet(out) assert df.shape == (1, 24) assert df["sub"][0] == "01" assert df["meanFD"][0] == m.meanFD @@ -263,16 +259,16 @@ def test_round_trip_polars(self, tmp_path: Path) -> None: def test_creates_parent_dirs(self, tmp_path: Path) -> None: """Parent directories are created if they don't exist.""" - out = tmp_path / "a" / "b" / "c" / "qc.tsv" + out = tmp_path / "a" / "b" / "c" / "qc.parquet" write_xcp_qc(_sample_xcp_metrics(), out) assert out.exists() def test_single_data_row(self, tmp_path: Path) -> None: - """TSV has exactly one header row and one data row.""" - out = tmp_path / "qc.tsv" + """Parquet has exactly one data row.""" + out = tmp_path / "qc.parquet" write_xcp_qc(_sample_xcp_metrics(), out) - lines = out.read_text().strip().splitlines() - assert len(lines) == 2 + df = pl.read_parquet(out) + assert df.shape[0] == 1 # =================================================================== diff --git a/tests/unit/core/test_timeseries.py b/tests/unit/core/test_timeseries.py index 56d89ff2..8a69a7a1 100644 --- a/tests/unit/core/test_timeseries.py +++ b/tests/unit/core/test_timeseries.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import numpy as np +import polars as pl import pytest from rbc.core.metrics.timeseries import ( @@ -247,7 +248,7 @@ def _make_nifti(self, data: np.ndarray, path: Path) -> None: img.to_filename(str(path)) def test_round_trip(self, tmp_path: Path) -> None: - """Should produce TSV files that can be loaded back.""" + """Should produce Parquet files that can be loaded back.""" rng = np.random.default_rng(20) data = rng.standard_normal((4, 4, 4, 10)) atlas = np.zeros((4, 4, 4), dtype=np.int16) @@ -262,13 +263,13 @@ def test_round_trip(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file) assert result.timeseries.exists() - assert result.correlation_matrix.exists() + assert result.connectome.exists() assert len(result.labels) == 2 - ts_loaded = np.loadtxt(result.timeseries, delimiter="\t") - assert ts_loaded.shape == (2, 10) + ts_loaded = pl.read_parquet(result.timeseries) + assert ts_loaded.shape == (10, 2) - corr_loaded = np.loadtxt(result.correlation_matrix, delimiter="\t") + corr_loaded = pl.read_parquet(result.connectome) assert corr_loaded.shape == (2, 2) def test_output_naming(self, tmp_path: Path) -> None: @@ -285,8 +286,8 @@ def test_output_naming(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file) - assert result.timeseries.name == "sub-01_bold_timeseries.tsv" - assert result.correlation_matrix.name == "sub-01_bold_correlation_matrix.tsv" + assert result.timeseries.name == "sub-01_bold_timeseries.parquet" + assert result.connectome.name == "sub-01_bold_connectome.parquet" def test_custom_out_dir(self, tmp_path: Path) -> None: """Should write to a custom output directory.""" @@ -304,7 +305,7 @@ def test_custom_out_dir(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file, out_dir=out_dir) assert result.timeseries.parent == out_dir - assert result.correlation_matrix.parent == out_dir + assert result.connectome.parent == out_dir def test_labels_in_output(self, tmp_path: Path) -> None: """Output labels should match the atlas ROI labels.""" diff --git a/tests/unit/orchestration/test_qc.py b/tests/unit/orchestration/test_qc.py index 4462b5eb..ab662691 100644 --- a/tests/unit/orchestration/test_qc.py +++ b/tests/unit/orchestration/test_qc.py @@ -24,7 +24,7 @@ def _mock_qc_outputs( ) -> QCOutputs: return QCOutputs( metrics={regressor: Mock(spec=XCPQCMetrics)}, - qc_file={regressor: Path("fake_workdir") / "qc.tsv"}, + qc_file={regressor: Path("fake_workdir") / "qc.parquet"}, passed=passed, ) From 12ebdee28776d032f9692f36028659703ae052aa Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Fri, 17 Apr 2026 15:05:15 -0400 Subject: [PATCH 2/6] docs changes --- src/rbc/core/qc/xcp.py | 2 +- src/rbc/workflows/qc.py | 4 ++-- tests/integration/functional/qc/test_xcp.py | 4 ++-- tests/unit/core/qc/test_xcp.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rbc/core/qc/xcp.py b/src/rbc/core/qc/xcp.py index 292031b3..37dde96e 100644 --- a/src/rbc/core/qc/xcp.py +++ b/src/rbc/core/qc/xcp.py @@ -148,7 +148,7 @@ def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path: Args: metrics: A populated :class:`XCPQCMetrics` row. out_path: Destination file path (parent dirs created if needed). - Should have a ``.parquet`` extension. + Parquet content will be written to this path. Returns: The output path (same as *out_path*). diff --git a/src/rbc/workflows/qc.py b/src/rbc/workflows/qc.py index 46d27b40..be6f1416 100644 --- a/src/rbc/workflows/qc.py +++ b/src/rbc/workflows/qc.py @@ -37,7 +37,7 @@ class QCOutputs: Attributes: metrics: All 24 XCP-style QC fields for the run. - qc_file: Path to the written single-row TSV. + qc_file: Path to the written single-row Parquet. passed: Whether the run passes RBC QC thresholds. """ @@ -157,7 +157,7 @@ def single_session_qc( norm=norm, ) - # 9. Write QC TSV + # 9. Write QC Parquet qc_outputs.qc_file[regressor] = write_xcp_qc( qc_outputs.metrics[regressor], work_dir / f"reg-{regressor}_qc.parquet", diff --git a/tests/integration/functional/qc/test_xcp.py b/tests/integration/functional/qc/test_xcp.py index 212d4912..1daf6ac1 100644 --- a/tests/integration/functional/qc/test_xcp.py +++ b/tests/integration/functional/qc/test_xcp.py @@ -32,7 +32,7 @@ def test_xcp_qc_from_bold( motion_corrected_bold: MotionCorrectedBOLD, tmp_path: Path, ) -> None: - """Compute all sub-metrics from real data, generate XCP TSV, and verify.""" + """Compute all sub-metrics from real data, generate XCP Parquet, and verify.""" mc = motion_corrected_bold.mc bold_data = motion_corrected_bold.bold_data mask = motion_corrected_bold.mask @@ -71,7 +71,7 @@ def test_xcp_qc_from_bold( assert metrics.meanFD >= 0 assert metrics.meanDVInit >= 0 - # Write TSV and verify + # Write Parquet and verify out_path = tmp_path / "xcp_qc.parquet" write_xcp_qc(metrics, out_path) assert out_path.exists() diff --git a/tests/unit/core/qc/test_xcp.py b/tests/unit/core/qc/test_xcp.py index faf0b3ce..11644875 100644 --- a/tests/unit/core/qc/test_xcp.py +++ b/tests/unit/core/qc/test_xcp.py @@ -220,7 +220,7 @@ def test_returns_named_tuple(self) -> None: # write_xcp_qc # =================================================================== class TestWriteXcpQc: - """Tests for writing XCP QC metrics to TSV.""" + """Tests for writing XCP QC metrics to Parquet.""" def test_writes_file(self, tmp_path: Path) -> None: """Output file is created.""" @@ -237,7 +237,7 @@ def test_correct_headers(self, tmp_path: Path) -> None: assert df.columns == EXPECTED_COLUMNS def test_correct_values(self, tmp_path: Path) -> None: - """Values in TSV match the input metrics.""" + """Values in Parquet match the input metrics.""" m = _sample_xcp_metrics() out = tmp_path / "qc.parquet" write_xcp_qc(m, out) @@ -247,7 +247,7 @@ def test_correct_values(self, tmp_path: Path) -> None: assert df["meanFD"][0] == m.meanFD def test_round_trip_polars(self, tmp_path: Path) -> None: - """Polars can read back the TSV and recover the values.""" + """Polars can read back the Parquet and recover the values.""" m = _sample_xcp_metrics() out = tmp_path / "qc.parquet" write_xcp_qc(m, out) From 35703b2d199ec5c7ece29f18b9eb36a9dcd6ccca Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Fri, 17 Apr 2026 15:47:12 -0400 Subject: [PATCH 3/6] match original tsv orientation --- src/rbc/core/metrics/timeseries.py | 9 ++++++--- tests/unit/core/test_timeseries.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/rbc/core/metrics/timeseries.py b/src/rbc/core/metrics/timeseries.py index 4beda412..9df30e14 100644 --- a/src/rbc/core/metrics/timeseries.py +++ b/src/rbc/core/metrics/timeseries.py @@ -167,10 +167,13 @@ def compute_timeseries( ts_path = out_dir / f"{stem}_timeseries.parquet" corr_path = out_dir / f"{stem}_connectome.parquet" - col_names = [str(label) for label in labels] + # ROIs + roi_names = [str(label) for label in labels] + # Timepoints + tp_names = [str(i) for i in range(ts.shape[1])] - pl.DataFrame(ts, schema=col_names).write_parquet(ts_path) - pl.DataFrame(corr, schema=col_names).write_parquet(corr_path) + pl.DataFrame(ts, schema=tp_names).write_parquet(ts_path) + pl.DataFrame(corr, schema=roi_names).write_parquet(corr_path) return TimeseriesOutputs( timeseries=ts_path, diff --git a/tests/unit/core/test_timeseries.py b/tests/unit/core/test_timeseries.py index 8a69a7a1..60f377cb 100644 --- a/tests/unit/core/test_timeseries.py +++ b/tests/unit/core/test_timeseries.py @@ -267,7 +267,7 @@ def test_round_trip(self, tmp_path: Path) -> None: assert len(result.labels) == 2 ts_loaded = pl.read_parquet(result.timeseries) - assert ts_loaded.shape == (10, 2) + assert ts_loaded.shape == (2, 10) corr_loaded = pl.read_parquet(result.connectome) assert corr_loaded.shape == (2, 2) From 63e5012ee15b076327a78e57fc2a6a90f4c5cbd9 Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Fri, 17 Apr 2026 16:48:59 -0400 Subject: [PATCH 4/6] quality back to tsv --- src/rbc/bids/qc.py | 2 +- src/rbc/core/qc/xcp.py | 5 +- src/rbc/workflows/qc.py | 6 +-- tests/full_pipeline/longitudinal/test_all.py | 2 +- .../longitudinal/test_metrics.py | 8 ++-- tests/integration/functional/qc/test_xcp.py | 12 +++-- tests/unit/core/qc/test_xcp.py | 48 ++++++++++--------- tests/unit/orchestration/test_qc.py | 2 +- 8 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/rbc/bids/qc.py b/src/rbc/bids/qc.py index b5e52feb..d036c05a 100644 --- a/src/rbc/bids/qc.py +++ b/src/rbc/bids/qc.py @@ -114,6 +114,6 @@ def export_qc( outputs.qc_file[reg], suffix="quality", desc="xcp", - extension=".parquet", + extension=".tsv", extra={"reg": bids_safe_label(reg)}, ) diff --git a/src/rbc/core/qc/xcp.py b/src/rbc/core/qc/xcp.py index 37dde96e..63b07ff0 100644 --- a/src/rbc/core/qc/xcp.py +++ b/src/rbc/core/qc/xcp.py @@ -143,12 +143,11 @@ def generate_xcp_qc( def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path: - """Write XCP QC metrics as a single-row Parquet file. + """Write XCP QC metrics as a single-row TSV file. Args: metrics: A populated :class:`XCPQCMetrics` row. out_path: Destination file path (parent dirs created if needed). - Parquet content will be written to this path. Returns: The output path (same as *out_path*). @@ -157,7 +156,7 @@ def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path: out_path.parent.mkdir(parents=True, exist_ok=True) df = pl.DataFrame([metrics._asdict()]) - df.write_parquet(out_path) + df.write_csv(out_path, separator="\t") return out_path diff --git a/src/rbc/workflows/qc.py b/src/rbc/workflows/qc.py index be6f1416..d2558428 100644 --- a/src/rbc/workflows/qc.py +++ b/src/rbc/workflows/qc.py @@ -37,7 +37,7 @@ class QCOutputs: Attributes: metrics: All 24 XCP-style QC fields for the run. - qc_file: Path to the written single-row Parquet. + qc_file: Path to the written single-row TSV. passed: Whether the run passes RBC QC thresholds. """ @@ -157,10 +157,10 @@ def single_session_qc( norm=norm, ) - # 9. Write QC Parquet + # 9. Write QC TSV qc_outputs.qc_file[regressor] = write_xcp_qc( qc_outputs.metrics[regressor], - work_dir / f"reg-{regressor}_qc.parquet", + work_dir / f"reg-{regressor}_qc.tsv", ) # 10. RBC pass/fail diff --git a/tests/full_pipeline/longitudinal/test_all.py b/tests/full_pipeline/longitudinal/test_all.py index 87c77869..395b569e 100644 --- a/tests/full_pipeline/longitudinal/test_all.py +++ b/tests/full_pipeline/longitudinal/test_all.py @@ -62,7 +62,7 @@ def test_longitudinal_all_produces_derivatives( assert list(func.glob(f"{_STEM}_space-longitudinal_*_alff.nii.gz")), ( f"Missing ALFF\n--- file tree ---\n{tree}" ) - assert list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.tsv")), ( + assert list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.parquet")), ( f"Missing timeseries\n--- file tree ---\n{tree}" ) diff --git a/tests/full_pipeline/longitudinal/test_metrics.py b/tests/full_pipeline/longitudinal/test_metrics.py index 64d36f4c..f4cc93f4 100644 --- a/tests/full_pipeline/longitudinal/test_metrics.py +++ b/tests/full_pipeline/longitudinal/test_metrics.py @@ -58,8 +58,8 @@ def test_longitudinal_metrics_timeseries_exist( func = longitudinal_pipeline_data / f"sub-{_SUB}" / f"ses-{_SES}" / "func" tree = _file_tree(longitudinal_pipeline_data) - timeseries = list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.tsv")) - assert timeseries, f"No timeseries TSV found\n--- file tree ---\n{tree}" + timeseries = list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.parquet")) + assert timeseries, f"No timeseries Parquet found\n--- file tree ---\n{tree}" - correlations = list(func.glob(f"{_STEM}_space-longitudinal_*_correlations.tsv")) - assert correlations, f"No correlation TSV found\n--- file tree ---\n{tree}" + correlations = list(func.glob(f"{_STEM}_space-longitudinal_*_connectome.parquet")) + assert correlations, f"No connectome Parquet found\n--- file tree ---\n{tree}" diff --git a/tests/integration/functional/qc/test_xcp.py b/tests/integration/functional/qc/test_xcp.py index 1daf6ac1..5c7f81c4 100644 --- a/tests/integration/functional/qc/test_xcp.py +++ b/tests/integration/functional/qc/test_xcp.py @@ -32,7 +32,7 @@ def test_xcp_qc_from_bold( motion_corrected_bold: MotionCorrectedBOLD, tmp_path: Path, ) -> None: - """Compute all sub-metrics from real data, generate XCP Parquet, and verify.""" + """Compute all sub-metrics from real data, generate XCP TSV, and verify.""" mc = motion_corrected_bold.mc bold_data = motion_corrected_bold.bold_data mask = motion_corrected_bold.mask @@ -71,12 +71,16 @@ def test_xcp_qc_from_bold( assert metrics.meanFD >= 0 assert metrics.meanDVInit >= 0 - # Write Parquet and verify - out_path = tmp_path / "xcp_qc.parquet" + # Write TSV and verify + out_path = tmp_path / "xcp_qc.tsv" write_xcp_qc(metrics, out_path) assert out_path.exists() - df = pl.read_parquet(out_path) + df = pl.read_csv( + out_path, + separator="\t", + schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8}, + ) assert df.shape == (1, 24) assert df["sub"][0] == test_subject.subject_id assert df["meanFD"][0] == metrics.meanFD diff --git a/tests/unit/core/qc/test_xcp.py b/tests/unit/core/qc/test_xcp.py index 11644875..1fb43349 100644 --- a/tests/unit/core/qc/test_xcp.py +++ b/tests/unit/core/qc/test_xcp.py @@ -110,8 +110,8 @@ def _sample_xcp_metrics() -> XCPQCMetrics: class TestXCPQCMetrics: """Tests for XCPQCMetrics NamedTuple structure.""" - def test_field_names_match_pq_columns(self) -> None: - """NamedTuple field names exactly match expected Parquet columns.""" + def test_field_names_match_tsv_columns(self) -> None: + """NamedTuple field names exactly match expected TSV columns.""" assert list(XCPQCMetrics._fields) == EXPECTED_COLUMNS def test_field_count(self) -> None: @@ -220,38 +220,42 @@ def test_returns_named_tuple(self) -> None: # write_xcp_qc # =================================================================== class TestWriteXcpQc: - """Tests for writing XCP QC metrics to Parquet.""" + """Tests for writing XCP QC metrics to TSV.""" def test_writes_file(self, tmp_path: Path) -> None: """Output file is created.""" - out = tmp_path / "qc.parquet" + out = tmp_path / "qc.tsv" result = write_xcp_qc(_sample_xcp_metrics(), out) assert result == out assert out.exists() def test_correct_headers(self, tmp_path: Path) -> None: - """Parquet column names match expected column names.""" - out = tmp_path / "qc.parquet" + """TSV header row matches expected column names.""" + out = tmp_path / "qc.tsv" write_xcp_qc(_sample_xcp_metrics(), out) - df = pl.read_parquet(out) - assert df.columns == EXPECTED_COLUMNS + header = out.read_text().splitlines()[0].split("\t") + assert header == EXPECTED_COLUMNS def test_correct_values(self, tmp_path: Path) -> None: - """Values in Parquet match the input metrics.""" + """Values in TSV match the input metrics.""" m = _sample_xcp_metrics() - out = tmp_path / "qc.parquet" + out = tmp_path / "qc.tsv" write_xcp_qc(m, out) - df = pl.read_parquet(out) - assert df["sub"][0] == "01" # sub - assert df["run"][0] == 1 # run - assert df["meanFD"][0] == m.meanFD + values = out.read_text().splitlines()[1].split("\t") + assert values[0] == "01" # sub + assert values[3] == "1" # run + assert float(values[7]) == m.meanFD def test_round_trip_polars(self, tmp_path: Path) -> None: - """Polars can read back the Parquet and recover the values.""" + """Polars can read back the TSV and recover the values.""" m = _sample_xcp_metrics() - out = tmp_path / "qc.parquet" + out = tmp_path / "qc.tsv" write_xcp_qc(m, out) - df = pl.read_parquet(out) + df = pl.read_csv( + out, + separator="\t", + schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8}, + ) assert df.shape == (1, 24) assert df["sub"][0] == "01" assert df["meanFD"][0] == m.meanFD @@ -259,16 +263,16 @@ def test_round_trip_polars(self, tmp_path: Path) -> None: def test_creates_parent_dirs(self, tmp_path: Path) -> None: """Parent directories are created if they don't exist.""" - out = tmp_path / "a" / "b" / "c" / "qc.parquet" + out = tmp_path / "a" / "b" / "c" / "qc.tsv" write_xcp_qc(_sample_xcp_metrics(), out) assert out.exists() def test_single_data_row(self, tmp_path: Path) -> None: - """Parquet has exactly one data row.""" - out = tmp_path / "qc.parquet" + """TSV has exactly one header row and one data row.""" + out = tmp_path / "qc.tsv" write_xcp_qc(_sample_xcp_metrics(), out) - df = pl.read_parquet(out) - assert df.shape[0] == 1 + lines = out.read_text().strip().splitlines() + assert len(lines) == 2 # =================================================================== diff --git a/tests/unit/orchestration/test_qc.py b/tests/unit/orchestration/test_qc.py index ab662691..4462b5eb 100644 --- a/tests/unit/orchestration/test_qc.py +++ b/tests/unit/orchestration/test_qc.py @@ -24,7 +24,7 @@ def _mock_qc_outputs( ) -> QCOutputs: return QCOutputs( metrics={regressor: Mock(spec=XCPQCMetrics)}, - qc_file={regressor: Path("fake_workdir") / "qc.parquet"}, + qc_file={regressor: Path("fake_workdir") / "qc.tsv"}, passed=passed, ) From 6c7e389157c7e5b7fead37e7f2638b3e737507e6 Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Fri, 17 Apr 2026 17:07:45 -0400 Subject: [PATCH 5/6] update export tests for parquets --- tests/unit/bids/test_exports.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/bids/test_exports.py b/tests/unit/bids/test_exports.py index 02dfc5fa..fe61af0d 100644 --- a/tests/unit/bids/test_exports.py +++ b/tests/unit/bids/test_exports.py @@ -296,9 +296,9 @@ class _FakeQC: qc_file: dict[str, Path] = field(default_factory=dict) mni = func_bids.derive(space="MNI152NLin6Asym") - qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.parquet")}) + qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.tsv")}) export_qc(mni, qc, regressors=["36-parameter"]) # type: ignore[arg-type] - saved = list(pipe_ctx.output_dir.rglob("*.parquet")) + saved = list(pipe_ctx.output_dir.rglob("*.tsv")) assert len(saved) == 1 assert "reg-36parameter" in saved[0].name @@ -314,7 +314,7 @@ class _FakeQC: regs = ["36-parameter", "aCompCor"] mni = func_bids.derive(space="MNI152NLin6Asym") - qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.parquet") for r in regs}) + qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.tsv") for r in regs}) export_qc(mni, qc, regressors=regs) # type: ignore[arg-type] - saved = list(pipe_ctx.output_dir.rglob("*.parquet")) + saved = list(pipe_ctx.output_dir.rglob("*.tsv")) assert len(saved) == 2 From 11ba5cbf77430c0e234dc2a94328e1358038ce0d Mon Sep 17 00:00:00 2001 From: Janhavi Pillai Date: Fri, 17 Apr 2026 17:48:51 -0400 Subject: [PATCH 6/6] revert qc to tsv --- tests/integration/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index 0ead5f8a..fe996b0a 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -185,7 +185,7 @@ def _assert_derivatives_exist(output_dir: Path) -> None: ) # -- QC -- - qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.parquet")) + qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.tsv")) assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}" # -- Metrics --