Skip to content

Commit a9ab564

Browse files
committed
tsv to pq & corr to connectome
1 parent 1e51d03 commit a9ab564

12 files changed

Lines changed: 63 additions & 68 deletions

File tree

src/rbc/bids/metrics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,13 @@ def export_metrics(
8989
outputs.timeseries[atl],
9090
suffix="timeseries",
9191
desc="mean",
92-
extension=".tsv",
92+
extension=".parquet",
9393
atlas=bids_safe_label(atl),
9494
)
9595
mex.save(
96-
outputs.correlation_matrix[atl],
97-
suffix="correlations",
96+
outputs.connectome[atl],
97+
suffix="connectome",
9898
desc="pearson",
99-
extension=".tsv",
99+
extension=".parquet",
100100
atlas=bids_safe_label(atl),
101101
)

src/rbc/bids/qc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,6 @@ def export_qc(
114114
outputs.qc_file[reg],
115115
suffix="quality",
116116
desc="xcp",
117-
extension=".tsv",
117+
extension=".parquet",
118118
extra={"reg": bids_safe_label(reg)},
119119
)

src/rbc/core/metrics/timeseries.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import NamedTuple
1212

1313
import numpy as np
14+
import polars as pl
1415

1516
from rbc.core.niwrap import generate_exec_folder
1617

@@ -113,7 +114,7 @@ class TimeseriesOutputs(NamedTuple):
113114
"""Outputs from :func:`compute_timeseries`."""
114115

115116
timeseries: Path
116-
correlation_matrix: Path
117+
connectome: Path
117118
labels: np.ndarray
118119

119120

@@ -134,7 +135,7 @@ def compute_timeseries(
134135
*in_file*.
135136
136137
Returns:
137-
:class:`TimeseriesOutputs` containing paths to the TSV files and
138+
:class:`TimeseriesOutputs` containing paths to the Parquet files and
138139
the ROI labels array.
139140
"""
140141
import nibabel as nib
@@ -163,14 +164,16 @@ def compute_timeseries(
163164
out_dir.mkdir(parents=True, exist_ok=True)
164165

165166
stem = in_file.name.split(".nii")[0]
166-
ts_path = out_dir / f"{stem}_timeseries.tsv"
167-
corr_path = out_dir / f"{stem}_correlation_matrix.tsv"
167+
ts_path = out_dir / f"{stem}_timeseries.parquet"
168+
corr_path = out_dir / f"{stem}_connectome.parquet"
168169

169-
np.savetxt(ts_path, ts, delimiter="\t")
170-
np.savetxt(corr_path, corr, delimiter="\t")
170+
col_names = [str(label) for label in labels]
171+
172+
pl.DataFrame(ts, schema=col_names).write_parquet(ts_path)
173+
pl.DataFrame(corr, schema=col_names).write_parquet(corr_path)
171174

172175
return TimeseriesOutputs(
173176
timeseries=ts_path,
174-
correlation_matrix=corr_path,
177+
connectome=corr_path,
175178
labels=labels,
176179
)

src/rbc/core/qc/xcp.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,12 @@ def generate_xcp_qc(
143143

144144

145145
def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path:
146-
"""Write XCP QC metrics as a single-row TSV file.
146+
"""Write XCP QC metrics as a single-row Parquet file.
147147
148148
Args:
149149
metrics: A populated :class:`XCPQCMetrics` row.
150150
out_path: Destination file path (parent dirs created if needed).
151+
Should have a ``.parquet`` extension.
151152
152153
Returns:
153154
The output path (same as *out_path*).
@@ -156,7 +157,7 @@ def write_xcp_qc(metrics: XCPQCMetrics, out_path: Path) -> Path:
156157
out_path.parent.mkdir(parents=True, exist_ok=True)
157158

158159
df = pl.DataFrame([metrics._asdict()])
159-
df.write_csv(out_path, separator="\t")
160+
df.write_parquet(out_path)
160161

161162
return out_path
162163

src/rbc/workflows/metrics.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ class MetricsOutputs(NamedTuple):
3939
reho: Raw ReHo map.
4040
reho_smooth: Smoothed ReHo map.
4141
reho_zscored: Z-scored (smoothed) ReHo map.
42-
timeseries: Atlas-based mean timeseries TSV.
43-
correlation_matrix: Pairwise correlation matrix TSV.
42+
timeseries: Atlas-based mean timeseries file.
43+
connectome: Pairwise connectome file.
4444
"""
4545

4646
alff: Path
@@ -53,7 +53,7 @@ class MetricsOutputs(NamedTuple):
5353
reho_smooth: Path
5454
reho_zscored: Path
5555
timeseries: dict[str, Path]
56-
correlation_matrix: dict[str, Path]
56+
connectome: dict[str, Path]
5757

5858

5959
def single_session_metrics(
@@ -127,7 +127,5 @@ def single_session_metrics(
127127
reho_smooth=reho_smooth_path,
128128
reho_zscored=reho_zscored_path,
129129
timeseries={label: ts.timeseries for label, ts in ts_outputs.items()},
130-
correlation_matrix={
131-
label: ts.correlation_matrix for label, ts in ts_outputs.items()
132-
},
130+
connectome={label: ts.connectome for label, ts in ts_outputs.items()},
133131
)

src/rbc/workflows/qc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def single_session_qc(
160160
# 9. Write QC TSV
161161
qc_outputs.qc_file[regressor] = write_xcp_qc(
162162
qc_outputs.metrics[regressor],
163-
work_dir / f"reg-{regressor}_qc.tsv",
163+
work_dir / f"reg-{regressor}_qc.parquet",
164164
)
165165

166166
# 10. RBC pass/fail

tests/integration/functional/qc/test_xcp.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,11 @@ def test_xcp_qc_from_bold(
7272
assert metrics.meanDVInit >= 0
7373

7474
# Write TSV and verify
75-
out_path = tmp_path / "xcp_qc.tsv"
75+
out_path = tmp_path / "xcp_qc.parquet"
7676
write_xcp_qc(metrics, out_path)
7777
assert out_path.exists()
7878

79-
df = pl.read_csv(
80-
out_path,
81-
separator="\t",
82-
schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8},
83-
)
79+
df = pl.read_parquet(out_path)
8480
assert df.shape == (1, 24)
8581
assert df["sub"][0] == test_subject.subject_id
8682
assert df["meanFD"][0] == metrics.meanFD

tests/integration/test_all.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,14 @@ def _assert_derivatives_exist(output_dir: Path) -> None:
185185
)
186186

187187
# -- QC --
188-
qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.tsv"))
188+
qc_files = list(func.glob(f"{bold_stem}_space-*_*_quality.parquet"))
189189
assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}"
190190

191191
# -- Metrics --
192-
assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), (
192+
assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.parquet")), (
193193
f"No timeseries TSV files found\n--- file tree ---\n{tree}"
194194
)
195-
assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), (
195+
assert list(func.glob(f"{bold_stem}_space-*_*_connectome.parquet")), (
196196
f"No correlation matrix TSV files found\n--- file tree ---\n{tree}"
197197
)
198198

tests/unit/bids/test_exports.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ def _make_metrics_outputs(w: Path, atlases: list[str]) -> MetricsOutputs:
8989
reho=_dummy(w, "reho.nii.gz"),
9090
reho_smooth=_dummy(w, "reho_smooth.nii.gz"),
9191
reho_zscored=_dummy(w, "reho_z.nii.gz"),
92-
timeseries={a: _dummy(w, f"ts_{a}.tsv") for a in atlases},
93-
correlation_matrix={a: _dummy(w, f"corr_{a}.tsv") for a in atlases},
92+
timeseries={a: _dummy(w, f"ts_{a}.parquet") for a in atlases},
93+
connectome={a: _dummy(w, f"connectome_{a}.parquet") for a in atlases},
9494
)
9595

9696

@@ -294,9 +294,9 @@ class _FakeQC:
294294
qc_file: dict[str, Path] = field(default_factory=dict)
295295

296296
mni = func_bids.derive(space="MNI152NLin6Asym")
297-
qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.tsv")})
297+
qc = _FakeQC(qc_file={"36-parameter": _dummy(workdir, "qc.parquet")})
298298
export_qc(mni, qc, regressors=["36-parameter"]) # type: ignore[arg-type]
299-
saved = list(pipe_ctx.output_dir.rglob("*.tsv"))
299+
saved = list(pipe_ctx.output_dir.rglob("*.parquet"))
300300
assert len(saved) == 1
301301
assert "reg-36parameter" in saved[0].name
302302

@@ -312,7 +312,7 @@ class _FakeQC:
312312

313313
regs = ["36-parameter", "aCompCor"]
314314
mni = func_bids.derive(space="MNI152NLin6Asym")
315-
qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.tsv") for r in regs})
315+
qc = _FakeQC(qc_file={r: _dummy(workdir, f"qc_{r}.parquet") for r in regs})
316316
export_qc(mni, qc, regressors=regs) # type: ignore[arg-type]
317-
saved = list(pipe_ctx.output_dir.rglob("*.tsv"))
317+
saved = list(pipe_ctx.output_dir.rglob("*.parquet"))
318318
assert len(saved) == 2

tests/unit/core/qc/test_xcp.py

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ def _sample_xcp_metrics() -> XCPQCMetrics:
110110
class TestXCPQCMetrics:
111111
"""Tests for XCPQCMetrics NamedTuple structure."""
112112

113-
def test_field_names_match_tsv_columns(self) -> None:
114-
"""NamedTuple field names exactly match expected TSV columns."""
113+
def test_field_names_match_pq_columns(self) -> None:
114+
"""NamedTuple field names exactly match expected Parquet columns."""
115115
assert list(XCPQCMetrics._fields) == EXPECTED_COLUMNS
116116

117117
def test_field_count(self) -> None:
@@ -224,55 +224,51 @@ class TestWriteXcpQc:
224224

225225
def test_writes_file(self, tmp_path: Path) -> None:
226226
"""Output file is created."""
227-
out = tmp_path / "qc.tsv"
227+
out = tmp_path / "qc.parquet"
228228
result = write_xcp_qc(_sample_xcp_metrics(), out)
229229
assert result == out
230230
assert out.exists()
231231

232232
def test_correct_headers(self, tmp_path: Path) -> None:
233-
"""TSV header row matches expected column names."""
234-
out = tmp_path / "qc.tsv"
233+
"""Parquet column names match expected column names."""
234+
out = tmp_path / "qc.parquet"
235235
write_xcp_qc(_sample_xcp_metrics(), out)
236-
header = out.read_text().splitlines()[0].split("\t")
237-
assert header == EXPECTED_COLUMNS
236+
df = pl.read_parquet(out)
237+
assert df.columns == EXPECTED_COLUMNS
238238

239239
def test_correct_values(self, tmp_path: Path) -> None:
240240
"""Values in TSV match the input metrics."""
241241
m = _sample_xcp_metrics()
242-
out = tmp_path / "qc.tsv"
242+
out = tmp_path / "qc.parquet"
243243
write_xcp_qc(m, out)
244-
values = out.read_text().splitlines()[1].split("\t")
245-
assert values[0] == "01" # sub
246-
assert values[3] == "1" # run
247-
assert float(values[7]) == m.meanFD
244+
df = pl.read_parquet(out)
245+
assert df["sub"][0] == "01" # sub
246+
assert df["run"][0] == 1 # run
247+
assert df["meanFD"][0] == m.meanFD
248248

249249
def test_round_trip_polars(self, tmp_path: Path) -> None:
250250
"""Polars can read back the TSV and recover the values."""
251251
m = _sample_xcp_metrics()
252-
out = tmp_path / "qc.tsv"
252+
out = tmp_path / "qc.parquet"
253253
write_xcp_qc(m, out)
254-
df = pl.read_csv(
255-
out,
256-
separator="\t",
257-
schema_overrides={"sub": pl.Utf8, "ses": pl.Utf8},
258-
)
254+
df = pl.read_parquet(out)
259255
assert df.shape == (1, 24)
260256
assert df["sub"][0] == "01"
261257
assert df["meanFD"][0] == m.meanFD
262258
assert df["normCoverage"][0] == m.normCoverage
263259

264260
def test_creates_parent_dirs(self, tmp_path: Path) -> None:
265261
"""Parent directories are created if they don't exist."""
266-
out = tmp_path / "a" / "b" / "c" / "qc.tsv"
262+
out = tmp_path / "a" / "b" / "c" / "qc.parquet"
267263
write_xcp_qc(_sample_xcp_metrics(), out)
268264
assert out.exists()
269265

270266
def test_single_data_row(self, tmp_path: Path) -> None:
271-
"""TSV has exactly one header row and one data row."""
272-
out = tmp_path / "qc.tsv"
267+
"""Parquet has exactly one data row."""
268+
out = tmp_path / "qc.parquet"
273269
write_xcp_qc(_sample_xcp_metrics(), out)
274-
lines = out.read_text().strip().splitlines()
275-
assert len(lines) == 2
270+
df = pl.read_parquet(out)
271+
assert df.shape[0] == 1
276272

277273

278274
# ===================================================================

0 commit comments

Comments
 (0)