diff --git a/src/rbc/bids/metrics.py b/src/rbc/bids/metrics.py index 422a9918..fb81e460 100644 --- a/src/rbc/bids/metrics.py +++ b/src/rbc/bids/metrics.py @@ -89,13 +89,13 @@ def export_metrics( outputs.timeseries[atl], suffix="timeseries", desc="mean", - extension=".tsv", + extension=".parquet", atlas=bids_safe_label(atl), ) mex.save( - outputs.correlation_matrix[atl], - suffix="correlations", + outputs.connectome[atl], + suffix="connectome", desc="pearson", - extension=".tsv", + extension=".parquet", atlas=bids_safe_label(atl), ) diff --git a/src/rbc/core/metrics/timeseries.py b/src/rbc/core/metrics/timeseries.py index 3735f005..9df30e14 100644 --- a/src/rbc/core/metrics/timeseries.py +++ b/src/rbc/core/metrics/timeseries.py @@ -11,6 +11,7 @@ from typing import NamedTuple import numpy as np +import polars as pl from rbc.core.niwrap import generate_exec_folder @@ -113,7 +114,7 @@ class TimeseriesOutputs(NamedTuple): """Outputs from :func:`compute_timeseries`.""" timeseries: Path - correlation_matrix: Path + connectome: Path labels: np.ndarray @@ -134,7 +135,7 @@ def compute_timeseries( *in_file*. Returns: - :class:`TimeseriesOutputs` containing paths to the TSV files and + :class:`TimeseriesOutputs` containing paths to the Parquet files and the ROI labels array. """ import nibabel as nib @@ -163,14 +164,19 @@ def compute_timeseries( out_dir.mkdir(parents=True, exist_ok=True) stem = in_file.name.split(".nii")[0] - ts_path = out_dir / f"{stem}_timeseries.tsv" - corr_path = out_dir / f"{stem}_correlation_matrix.tsv" + ts_path = out_dir / f"{stem}_timeseries.parquet" + corr_path = out_dir / f"{stem}_connectome.parquet" - np.savetxt(ts_path, ts, delimiter="\t") - np.savetxt(corr_path, corr, delimiter="\t") + # ROIs + roi_names = [str(label) for label in labels] + # Timepoints + tp_names = [str(i) for i in range(ts.shape[1])] + + pl.DataFrame(ts, schema=tp_names).write_parquet(ts_path) + pl.DataFrame(corr, schema=roi_names).write_parquet(corr_path) return TimeseriesOutputs( timeseries=ts_path, - correlation_matrix=corr_path, + connectome=corr_path, labels=labels, ) diff --git a/src/rbc/workflows/metrics.py b/src/rbc/workflows/metrics.py index 929f2d9d..701c8508 100644 --- a/src/rbc/workflows/metrics.py +++ b/src/rbc/workflows/metrics.py @@ -39,8 +39,8 @@ class MetricsOutputs(NamedTuple): reho: Raw ReHo map. reho_smooth: Smoothed ReHo map. reho_zscored: Z-scored (smoothed) ReHo map. - timeseries: Atlas-based mean timeseries TSV. - correlation_matrix: Pairwise correlation matrix TSV. + timeseries: Atlas-based mean timeseries file. + connectome: Pairwise connectome file. """ alff: Path @@ -53,7 +53,7 @@ class MetricsOutputs(NamedTuple): reho_smooth: Path reho_zscored: Path timeseries: dict[str, Path] - correlation_matrix: dict[str, Path] + connectome: dict[str, Path] def single_session_metrics( @@ -127,7 +127,5 @@ def single_session_metrics( reho_smooth=reho_smooth_path, reho_zscored=reho_zscored_path, timeseries={label: ts.timeseries for label, ts in ts_outputs.items()}, - correlation_matrix={ - label: ts.correlation_matrix for label, ts in ts_outputs.items() - }, + connectome={label: ts.connectome for label, ts in ts_outputs.items()}, ) diff --git a/tests/full_pipeline/longitudinal/test_all.py b/tests/full_pipeline/longitudinal/test_all.py index 87c77869..395b569e 100644 --- a/tests/full_pipeline/longitudinal/test_all.py +++ b/tests/full_pipeline/longitudinal/test_all.py @@ -62,7 +62,7 @@ def test_longitudinal_all_produces_derivatives( assert list(func.glob(f"{_STEM}_space-longitudinal_*_alff.nii.gz")), ( f"Missing ALFF\n--- file tree ---\n{tree}" ) - assert list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.tsv")), ( + assert list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.parquet")), ( f"Missing timeseries\n--- file tree ---\n{tree}" ) diff --git a/tests/full_pipeline/longitudinal/test_metrics.py b/tests/full_pipeline/longitudinal/test_metrics.py index 64d36f4c..f4cc93f4 100644 --- a/tests/full_pipeline/longitudinal/test_metrics.py +++ b/tests/full_pipeline/longitudinal/test_metrics.py @@ -58,8 +58,8 @@ def test_longitudinal_metrics_timeseries_exist( func = longitudinal_pipeline_data / f"sub-{_SUB}" / f"ses-{_SES}" / "func" tree = _file_tree(longitudinal_pipeline_data) - timeseries = list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.tsv")) - assert timeseries, f"No timeseries TSV found\n--- file tree ---\n{tree}" + timeseries = list(func.glob(f"{_STEM}_space-longitudinal_*_timeseries.parquet")) + assert timeseries, f"No timeseries Parquet found\n--- file tree ---\n{tree}" - correlations = list(func.glob(f"{_STEM}_space-longitudinal_*_correlations.tsv")) - assert correlations, f"No correlation TSV found\n--- file tree ---\n{tree}" + correlations = list(func.glob(f"{_STEM}_space-longitudinal_*_connectome.parquet")) + assert correlations, f"No connectome Parquet found\n--- file tree ---\n{tree}" diff --git a/tests/integration/test_all.py b/tests/integration/test_all.py index fc6159fd..fe996b0a 100644 --- a/tests/integration/test_all.py +++ b/tests/integration/test_all.py @@ -189,10 +189,10 @@ def _assert_derivatives_exist(output_dir: Path) -> None: assert qc_files, f"No QC quality TSV files found\n--- file tree ---\n{tree}" # -- Metrics -- - assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.tsv")), ( + assert list(func.glob(f"{bold_stem}_space-*_*_timeseries.parquet")), ( f"No timeseries TSV files found\n--- file tree ---\n{tree}" ) - assert list(func.glob(f"{bold_stem}_space-*_*_correlations.tsv")), ( + assert list(func.glob(f"{bold_stem}_space-*_*_connectome.parquet")), ( f"No correlation matrix TSV files found\n--- file tree ---\n{tree}" ) diff --git a/tests/unit/bids/test_exports.py b/tests/unit/bids/test_exports.py index 33ec4852..fe61af0d 100644 --- a/tests/unit/bids/test_exports.py +++ b/tests/unit/bids/test_exports.py @@ -90,8 +90,8 @@ def _make_metrics_outputs(w: Path, atlases: list[str]) -> MetricsOutputs: reho=_dummy(w, "reho.nii.gz"), reho_smooth=_dummy(w, "reho_smooth.nii.gz"), reho_zscored=_dummy(w, "reho_z.nii.gz"), - timeseries={a: _dummy(w, f"ts_{a}.tsv") for a in atlases}, - correlation_matrix={a: _dummy(w, f"corr_{a}.tsv") for a in atlases}, + timeseries={a: _dummy(w, f"ts_{a}.parquet") for a in atlases}, + connectome={a: _dummy(w, f"connectome_{a}.parquet") for a in atlases}, ) diff --git a/tests/unit/core/test_timeseries.py b/tests/unit/core/test_timeseries.py index 56d89ff2..60f377cb 100644 --- a/tests/unit/core/test_timeseries.py +++ b/tests/unit/core/test_timeseries.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import numpy as np +import polars as pl import pytest from rbc.core.metrics.timeseries import ( @@ -247,7 +248,7 @@ def _make_nifti(self, data: np.ndarray, path: Path) -> None: img.to_filename(str(path)) def test_round_trip(self, tmp_path: Path) -> None: - """Should produce TSV files that can be loaded back.""" + """Should produce Parquet files that can be loaded back.""" rng = np.random.default_rng(20) data = rng.standard_normal((4, 4, 4, 10)) atlas = np.zeros((4, 4, 4), dtype=np.int16) @@ -262,13 +263,13 @@ def test_round_trip(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file) assert result.timeseries.exists() - assert result.correlation_matrix.exists() + assert result.connectome.exists() assert len(result.labels) == 2 - ts_loaded = np.loadtxt(result.timeseries, delimiter="\t") + ts_loaded = pl.read_parquet(result.timeseries) assert ts_loaded.shape == (2, 10) - corr_loaded = np.loadtxt(result.correlation_matrix, delimiter="\t") + corr_loaded = pl.read_parquet(result.connectome) assert corr_loaded.shape == (2, 2) def test_output_naming(self, tmp_path: Path) -> None: @@ -285,8 +286,8 @@ def test_output_naming(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file) - assert result.timeseries.name == "sub-01_bold_timeseries.tsv" - assert result.correlation_matrix.name == "sub-01_bold_correlation_matrix.tsv" + assert result.timeseries.name == "sub-01_bold_timeseries.parquet" + assert result.connectome.name == "sub-01_bold_connectome.parquet" def test_custom_out_dir(self, tmp_path: Path) -> None: """Should write to a custom output directory.""" @@ -304,7 +305,7 @@ def test_custom_out_dir(self, tmp_path: Path) -> None: result = compute_timeseries(in_file, atlas_file, out_dir=out_dir) assert result.timeseries.parent == out_dir - assert result.correlation_matrix.parent == out_dir + assert result.connectome.parent == out_dir def test_labels_in_output(self, tmp_path: Path) -> None: """Output labels should match the atlas ROI labels."""