diff --git a/.python-version b/.python-version index bd28b9c..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.9 +3.13 diff --git a/README.md b/README.md index a423c82..24d17ff 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ This project uses [uv](https://github.com/astral-sh/uv) for fast, reliable Pytho curl -LsSf https://astral.sh/uv/install.sh | sh # Install the package in development mode with all dependencies -uv pip install -e ".[analysis,dev]" +uv sync --all-extras ``` ### Running Scripts @@ -57,7 +57,7 @@ uv run scripts/fsstats_extraction.py --help ```bash # Start JupyterLab with all analysis dependencies -uv run --with "jupyterlab" jupyter lab +uv run jupyter lab ``` ## Development Workflow @@ -113,10 +113,10 @@ git config diff.ipynb.textconv 'nbstripout -t' ## Analysis Pipeline -1. **Data Processing**: Use `scripts/fsstats_extraction.py` to extract FreeSurfer statistics -2. **Exploration**: Run notebooks in `notebooks/` directory for analysis -3. **Figure Generation**: Use `notebooks/figures.ipynb` to create publication figures -4. **Outputs**: All results saved to `output/` directory, figures to `output/images/` +1. **Stats Extraction**: `scripts/fsstats_extraction.py` to extract FreeSurfer statistics (tricky dependencies involved) +2. **Other exploration/figures**: Run notebooks in `notebooks/` +3. **Outputs**: Most results saved to `output/` directory, figures to `output/images/` +4. **Swarm output**: Larger datasets (freesurfer derivatives, swarm files etc.) are stored in the appropriate cluster dirs. ## Contributing diff --git a/dl_morphometrics_helpers/constants.py b/dl_morphometrics_helpers/constants.py index 84e8835..333a152 100644 --- a/dl_morphometrics_helpers/constants.py +++ b/dl_morphometrics_helpers/constants.py @@ -32,11 +32,13 @@ pipelines = [ ("recon-all", "_ra", "inner"), # ('recon-all-8', '_ra8', 'inner'), + ("recon-all-8_1", "_ra81", "inner"), ("recon-all_clinical_t1", "_ract1", "inner"), ("recon-all_clinical_t2", "_ract2", "inner"), ("recon-all_clinical_t1_resample-3", "_ract1r3", "inner"), ("recon-all_clinical_t1_resample-5", "_ract1r5", "inner"), # ('recon-all_not2', '_ranot2', 'inner'), + ("synthsr-rac-with-skullstrip", "_raseed", "inner"), ("recon-any_t2", "_ranyt2", "inner"), ("recon-any_t1", "_ranyt1", "inner"), ("recon-any_t1_resample-3", "_ranyt1r3", "inner"), @@ -47,6 +49,7 @@ diff_specs = { # key: (base_suffix, comp_suffix) # 'all8': ('_ra', '_ra8'), + "all81": ("_ra", "_ra81"), "ct1": ("_ra", "_ract1"), "ct2": ("_ra", "_ract2"), "ct1r3": ("_ra", "_ract1r3"), @@ -58,11 +61,14 @@ "anyt1r3": ("_ra", "_ranyt1r3"), "anyt1r5": ("_ra", "_ranyt1r5"), "any1any2": ("_ranyt1", "_ranyt2"), + # SynthSR‑seeded recon‑all vs 8.1 baseline + "raseed81": ("_ra81", "_raseed"), } comparison_labels = { # key: LABEL "all8": "RA_7-vs-8", + "all81": "RA_7-vs-8.1", "ct1": "RAC_T1", "ct2": "RAC_T2", "ct1r3": "RAC_T1-R3", @@ -74,12 +80,17 @@ "anyt1r3": "RANY_T1-R3", "anyt1r5": "RANY_T1-R5", "any1any2": "RANY_T1-vs-T2", + # SynthSR‑seeded recon‑all label + "raseed81": "RA_seeded_RAC_SynthSR", } # RAC / RANY split rac_groups = ["ct1", "ct1r3", "ct1r5", "ct2", "ct1ct2"] rany_groups = ["anyt1", "anyt1r3", "anyt1r5", "anyt2", "any1any2"] +# Optional RA variant comparisons (legacy vs 8.1, and 8.1 vs SynthSR‑seeded) +ra_variant_groups = ["all81", "raseed81"] + # ============================================================================ # PATH CONFIGURATION # ============================================================================ @@ -88,10 +99,14 @@ # Update these as needed for your local setup project_dir = Path("/data/ABCD_MBDU/ohbm2024/") drv_dir = project_dir / "data/derivatives/" +abcd_data = Path("/data/ABCD_DSST/ABCD") stats_dir = drv_dir / "leej3/fs_stats_multi" tpl_root = project_dir / "templateflow" / "tpl-fsaverage" -age_tsv = "/data/ABCD_DSST/ABCD/imaging_data/fast_track/sessions.tsv" -fastages_tsv = "/data/ABCD_DSST/ABCD/imaging_data/fast_track/code/abcd_fastqc01_history/2024-05-01/abcd_fastqc01_ages_innerjoin_sessions_without_age.tsv" +age_tsv = abcd_data / "imaging_data/fast_track/sessions.tsv" +fastages_tsv = ( + abcd_data + / "imaging_data/fast_track/code/abcd_fastqc01_history/2024-05-01/abcd_fastqc01_ages_innerjoin_sessions_without_age.tsv" +) path_inputs = [drv_dir / "freesurfer" / p[0] for p in pipelines] diff --git a/dl_morphometrics_helpers/data_processing.py b/dl_morphometrics_helpers/data_processing.py index d443a23..cfb814f 100644 --- a/dl_morphometrics_helpers/data_processing.py +++ b/dl_morphometrics_helpers/data_processing.py @@ -5,12 +5,16 @@ particularly for handling hemisphere-specific data and region name extraction. """ -from typing import Optional +from __future__ import annotations + +from pathlib import Path import pandas as pd +from . import constants as cfg + -def strip_to_hemi(col: str) -> Optional[str]: +def strip_to_hemi(col: str) -> str | None: """ If `col` contains '_lh' or '_rh', return everything up through that hemisphere tag. Otherwise return None. @@ -55,3 +59,238 @@ def get_region_names(df: pd.DataFrame) -> set[str]: stripped for col in df.columns if (stripped := strip_to_hemi(col)) is not None } return regions + + +def parse_bids_ids( + df: pd.DataFrame, + participant_col: str = "participant_id", + session_col: str = "session_id", +) -> pd.DataFrame: + """ + Parse BIDS participant and session IDs into subject/session columns. + + Args: + df: DataFrame with BIDS-style participant_id and session_id columns + participant_col: Name of the participant ID column + session_col: Name of the session ID column + + Returns: + DataFrame with added 'subject' and 'session' columns + """ + df = df.copy() + df["subject"] = df[participant_col].str.split("-").str[1] + df["session"] = df[session_col].str.split("-").str[1] + return df + + +def load_balanced_scans(bids_dir: Path, project_dir: Path) -> pd.DataFrame: + """ + Load and prepare balanced scans DataFrame with full paths. + + Args: + bids_dir: Path to BIDS raw data directory + project_dir: Path to project directory containing balanced_scans.csv + + Returns: + DataFrame with balanced scans and full file paths + """ + balanced_scans = pd.read_csv(project_dir / "code/balanced_scans.csv") + balanced_scans["path"] = bids_dir / balanced_scans.filename + return balanced_scans + + +def prepare_scan_pairs( + balanced_scans: pd.DataFrame, max_per_session: int = 250 +) -> pd.DataFrame: + """ + Prepare T1w/T2w scan pairs for processing. + + Args: + balanced_scans: DataFrame from load_balanced_scans() + max_per_session: Maximum scans per session + + Returns: + DataFrame with paired T1w/T2w scans + """ + t1w_scans = ( + balanced_scans.query("modality == 'T1w'") + .loc[:, ["participant_id", "session_id", "path"]] + .rename(columns={"path": "t1_path"}) + ) + + t2w_scans = ( + balanced_scans.query("modality == 'T2w'") + .loc[:, ["participant_id", "session_id", "path"]] + .rename(columns={"path": "t2_path"}) + ) + + scans_to_run = t1w_scans.merge( + t2w_scans, how="left", on=["participant_id", "session_id"] + ) + scans_to_run = parse_bids_ids(scans_to_run) + scans_to_run = scans_to_run.groupby("session").head(max_per_session) + + return scans_to_run + + +def load_session_info() -> pd.DataFrame: + """ + Load and process session info with age conversion. + + Returns: + DataFrame with processed session information including age_years + """ + ses_info = pd.read_csv(cfg.age_tsv, sep="\t") + ses_info = parse_bids_ids(ses_info) + ses_info["age_years"] = ses_info.age / 12 + return ses_info + + +def load_fastages_info() -> pd.DataFrame: + """ + Load and process fast ages info. + + Returns: + DataFrame with processed fast ages information + """ + fastages = pd.read_csv(cfg.fastages_tsv, sep="\t") + fastages = parse_bids_ids(fastages) + fastages["age_years"] = fastages.age / 12 + return fastages + + +def merge_age_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Merge DataFrame with age data from both session sources. + + Args: + df: DataFrame with subject/session columns to merge age data into + + Returns: + DataFrame with age_years column populated + """ + ses_info = load_session_info() + fastages = load_fastages_info() + + # Merge with main session info + with_age = df.merge( + ses_info.loc[:, cfg.metadata_cols], how="left", on=["subject", "session"] + ) + + # Fill missing ages with fastages data + with_all_ages = with_age.merge( + fastages.loc[:, cfg.metadata_cols], + how="left", + on=["subject", "session"], + suffixes=("", "_fa"), + ) + with_all_ages["age_years"] = with_all_ages.age_years.fillna( + with_all_ages.age_years_fa + ) + + # Clean up temporary columns + with_all_ages = with_all_ages.drop( + columns=[col for col in with_all_ages.columns if col.endswith("_fa")] + ) + + return with_all_ages + + +def parse_synthsr_path(path: Path) -> dict[str, str | None]: + """ + Parse a SynthSR file path to extract subject, session, and pipeline information. + + Args: + path: Path to SynthSR file + + Returns: + Dictionary with subject_id, session_id, pipeline_src, and synth_path, or None if parsing fails + """ + parts = path.parts + + subj_index = parts.index("fs_out") - 2 + sess_index = parts.index("fs_out") - 1 + subject_id = parts[subj_index] # e.g. 'sub-XXXX' + session_id = parts[sess_index] if parts[sess_index].startswith("ses-") else None + pipeline_name = parts[parts.index("freesurfer") + 1] # pipeline directory name + + return { + "subject": subject_id, + "session": session_id, + "pipeline_src": pipeline_name, + "t1_path": str(path), + } + + +def get_synthsr_for_pipeline( + fs_deriv_dir: Path, pipeline_name: str, glob_pattern="*/*/fs_out/*/mri/SynthSR.mgz" +) -> pd.DataFrame: + """ + Find and catalog SynthSR files across FreeSurfer pipeline outputs. + + Args: + fs_deriv_dir: Path to FreeSurfer derivatives directory + pipeline_name: Pipeline name to filter for (e.g., 'recon-any_t1') + + Returns: + DataFrame with SynthSR file information + """ + synth_files = [] + pipeline_dir = fs_deriv_dir / pipeline_name + assert pipeline_dir.exists() + synth_files.extend(pipeline_dir.glob(glob_pattern)) + + synth_records = [] + for path in synth_files: + record = parse_synthsr_path(path) + if record: + synth_records.append(record) + + return pd.DataFrame(synth_records) + + +def load_brain_confounds(pipeline: str, suffix: str) -> pd.DataFrame: + """ + Load BrainConfounds.tsv for a specific pipeline with proper column naming. + + Args: + pipeline: Pipeline name (e.g., 'recon-all', 'recon-all_clinical_t1') + suffix: Suffix to append to metric columns (e.g., '_ra', '_ract1') + + Returns: + DataFrame with renamed columns + """ + fn = cfg.stats_dir / pipeline / "BrainConfounds.tsv" + df = pd.read_csv(fn, sep="\t") + df.columns = ["subject", "session"] + [m + suffix for m in cfg.metric_cols] + return df + + +def load_and_merge_brain_confounds() -> pd.DataFrame: + """ + Load and merge all brain confounds data according to pipeline configuration. + + Returns: + Merged and scaled DataFrame with all pipeline data + """ + dfs = {} + + for pipeline, suffix, how in cfg.pipelines: + df = load_brain_confounds(pipeline, suffix) + dfs[suffix] = (df, how) + + # Merge them in sequence + merged, _ = dfs["_ra"] # start with recon-all (_ra) + for _, suffix, _how in cfg.pipelines[1:]: + df, merge_how = dfs[suffix] + merged = merged.merge(df, on=["subject", "session"], how=merge_how) + + # Fix column names + merged.columns = ( + merged.columns.str.strip().str.replace(r"[\s\-]+", "_", regex=True).str.lower() + ) + + # Scale to 10^5 mm^3 + merged.iloc[:, 2:] /= 10000 + + return merged diff --git a/dl_morphometrics_helpers/dlm_utils.py b/dl_morphometrics_helpers/dlm_utils.py new file mode 100644 index 0000000..669ec84 --- /dev/null +++ b/dl_morphometrics_helpers/dlm_utils.py @@ -0,0 +1,82 @@ +from pathlib import Path + + +def build_fs_swarm_cmd( + cmds, + *, + test=True, + # Naming + pipeline_version, + pipeline_dirname=None, # Directory name for pipeline (if different from pipeline_version) + run_tag="fs-8-leej3", # e.g., "fs-8-leej3" or "fs-8-rerun-leej3" + # Resources + ncpus=16, + g_mem=100, # GB for -g + lscratch=400, # GB local scratch + time="12:00:00", # e.g., "12:00:00" or "2-12:00:00" + partition="norm", + # Modules + freesurfer_module_version="freesurfer/7.4.1", + extra_modules=("fsl",), # extra modules to load along with freesurfer + # I/O + swarm_cmd_dir=Path("./swarm_cmds"), + swarm_log_dir=Path("./swarm_logs"), +): + """ + Consolidated swarm command builder for FreeSurfer jobs. + + Args: + cmds: List of command strings to execute + test: If True, writes only the first two commands and appends '-test' to job name + pipeline_version: Version identifier for the pipeline (used in job naming) + pipeline_dirname: Directory name for pipeline outputs (defaults to pipeline_version if not provided) + run_tag: Tag to distinguish different runs (e.g., 'fs-8-leej3', 'fs-8-rerun-leej3') + ncpus: Number of CPUs to request per job + g_mem: Memory in GB to request per job + lscratch: Local scratch space in GB to request per job + time: Wall time limit (e.g., '12:00:00' or '2-12:00:00') + partition: SLURM partition to submit to + freesurfer_module_version: FreeSurfer module version to load + extra_modules: Additional modules to load alongside FreeSurfer + swarm_cmd_dir: Directory to write swarm command files + swarm_log_dir: Directory for swarm log files + + Returns: + Single-line swarm command string ready to copy/paste + + Notes: + - Creates `swarm_cmd_dir` and `swarm_log_dir` if they don't exist + - Uses `pipeline_dirname` for directory structure if provided, otherwise uses `pipeline_version` + """ + # Determine effective pipeline directory name + + # Build job/run name using pipeline_version (for consistency with existing naming) + run_name = f"abcd_recon_{pipeline_version}-{run_tag}{'-test' if test else ''}" + + # Ensure directories exist + swarm_cmd_dir.mkdir(parents=True, exist_ok=True) + swarm_log_dir.mkdir(parents=True, exist_ok=True) + + # Write swarm command file + swarm_cmd_file = swarm_cmd_dir / run_name + to_write = cmds[:2] if test else cmds + swarm_cmd_file.write_text("\n".join(to_write) + "\n") + + # Build module string + modules = ( + [freesurfer_module_version, *extra_modules] + if extra_modules + else [freesurfer_module_version] + ) + module_arg = ",".join(modules) + + # Compose single-line swarm command (no leading spaces/newlines) + swarm_exec = ( + f"swarm -f {swarm_cmd_file.absolute()} " + f"-g {g_mem} -t {ncpus} --gres=lscratch:{lscratch} " + f"--module {module_arg} --time {time} " + f"--logdir {swarm_log_dir.absolute()} --job-name {run_name} " + f"--partition {partition}" + ) + + return swarm_exec diff --git a/docs/synthsr-seeding.md b/docs/synthsr-seeding.md new file mode 100644 index 0000000..cfaf6b4 --- /dev/null +++ b/docs/synthsr-seeding.md @@ -0,0 +1,12 @@ +SynthSR is a deep learning tool that generates a high-resolution 1 mm isotropic T1-weighted image from an input scan of any resolution/contrast. By feeding these SynthSR-derived images into the standard recon-all pipeline, we obtain FreeSurfer results as if the scans were high-quality MPRAGE images. This allows us to assess how the canonical pipeline performs on enhanced inputs and compare results across pipelines. + +**We will run recon-all 8 using the synthSR outputs from the following pipelines:** +**- recon-all 8 control** +**- recon-all clinical (run on T1 resampled to 1x1x5)** +**- recon any (run on T1 resampled to 1x1x5)** + +The approach for this will be: + +- Find each SynthSR.mgz (a brain-extracted 1 mm MRI), something like `.../freesurfer//sub-12345/ses-ABC/fs_out/sub-12345/mri/SynthSR.mgz` +- Run a freesurfer swarm for each pipeline (name each pipeline by appending `_synthsr` to the original) + - remember to use -noskullstrip diff --git a/notebooks/check_numbers.ipynb b/notebooks/check_numbers.ipynb index 1bef5e5..0a2b3b0 100644 --- a/notebooks/check_numbers.ipynb +++ b/notebooks/check_numbers.ipynb @@ -25,7 +25,8 @@ "from scipy import stats\n", "from matplotlib.gridspec import GridSpec\n", "\n", - "from dl_morphometrics_helpers import constants as cfg\n" + "from dl_morphometrics_helpers import constants as cfg\n", + "from dl_morphometrics_helpers import data_processing as dp\n" ] }, { @@ -36,14 +37,8 @@ "outputs": [], "source": [ "cp = sns.color_palette()\n", - "global_df = pd.read_csv('../data/derivatives/fs_stats/recon-all/BrainConfounds.tsv', sep='\\t')\n", - "\n", - "global_df_ct1 = pd.read_csv('../data/derivatives/fs_stats/recon-all_clinical_t1/BrainConfounds.tsv', sep='\\t')\n", - "global_df_ct2 = pd.read_csv('../data/derivatives/fs_stats/recon-all_clinical_t2/BrainConfounds.tsv', sep='\\t')\n", - "global_df_not2 = pd.read_csv('../data/derivatives/fs_stats/recon-all_not2/BrainConfounds.tsv', sep='\\t')\n", - "\n", - "global_df_ct1r5 = pd.read_csv('../data/derivatives/fs_stats/recon-all_clinical_t1_resample-5/BrainConfounds.tsv', sep='\\t')\n", - "global_df_ct1r3 = pd.read_csv('../data/derivatives/fs_stats/recon-all_clinical_t1_resample-3/BrainConfounds.tsv', sep='\\t')" + "# Use helper function instead of manual loading\n", + "global_df = dp.load_and_merge_brain_confounds()" ] }, { @@ -53,23 +48,8 @@ "metadata": {}, "outputs": [], "source": [ - "measures = ['Subcortical gray matter volume',\n", - " 'Total gray matter volume', 'Total cortical gray matter volume',\n", - " 'Mask Volume', 'Brain Segmentation Volume Without Ventricles',\n", - " 'Left hemisphere cerebral white matter volume',\n", - " 'Right hemisphere cerebral white matter volume',\n", - " 'Total cerebral white matter volume']\n", - "ct2_measures = [mm + '_ract2' for mm in measures]\n", - "global_df_ct2.columns = ['subject', 'session',] + ct2_measures\n", - "\n", - "not2_measures = [mm + '_ranot2' for mm in measures]\n", - "global_df_not2.columns = ['subject', 'session',] + not2_measures\n", - "\n", - "ct1r3_measures = [mm + '_ract1r3' for mm in measures]\n", - "global_df_ct1r3.columns = ['subject', 'session',] + ct1r3_measures\n", - "\n", - "ct1r5_measures = [mm + '_ract1r5' for mm in measures]\n", - "global_df_ct1r5.columns = ['subject', 'session',] + ct1r5_measures" + "# Column renaming is already handled by load_and_merge_brain_confounds()\n", + "# No need to manually set column names" ] }, { @@ -79,11 +59,8 @@ "metadata": {}, "outputs": [], "source": [ - "global_df = global_df.merge(global_df_ct1, how='inner', on=['subject', 'session'], suffixes=['_ra', '_ract1'])\n", - "global_df = global_df.merge(global_df_ct2, how='inner', on=['subject', 'session'])\n", - "global_df = global_df.merge(global_df_not2, how='inner', on=['subject', 'session'])\n", - "global_df = global_df.merge(global_df_ct1r3, how='left', on=['subject', 'session'])\n", - "global_df = global_df.merge(global_df_ct1r5, how='left', on=['subject', 'session'])\n" + "# Merging is already handled by load_and_merge_brain_confounds()\n", + "# No manual merging needed\n" ] }, { @@ -93,7 +70,8 @@ "metadata": {}, "outputs": [], "source": [ - "global_df.iloc[:, 2:] /= 10000" + "# Scaling is already handled by load_and_merge_brain_confounds()\n", + "# No manual scaling needed" ] }, { @@ -103,7 +81,8 @@ "metadata": {}, "outputs": [], "source": [ - "ses_info = pd.read_csv('/data/ABCD_DSST/ABCD/imaging_data/fast_track/sessions.tsv', sep='\\t')" + "# Use configured path instead of hardcoded path\n", + "ses_info = pd.read_csv(cfg.age_tsv, sep='\\t')" ] }, { @@ -113,8 +92,8 @@ "metadata": {}, "outputs": [], "source": [ - "ses_info['subject'] = ses_info.participant_id.str.split('-').str[1]\n", - "ses_info['session'] = ses_info.session_id.str.split('-').str[1]\n", + "# Use helper function instead of manual parsing\n", + "ses_info = dp.parse_bids_ids(ses_info)\n", "ses_info['age_years'] = ses_info.age / 12\n", "pml = len(global_df)\n", "global_df = global_df.merge(ses_info.loc[:, ['subject', 'session', 'age_years']], how='left', on=['subject', 'session'])\n", @@ -148,9 +127,9 @@ "outputs": [], "source": [ "\n", - "fastages = pd.read_csv('/data/ABCD_DSST/ABCD/imaging_data/fast_track/code/abcd_fastqc01_history/2024-05-01/abcd_fastqc01_ages_innerjoin_sessions_without_age.tsv', sep='\\t')\n", - "fastages['subject'] = fastages.participant_id.str.split('-').str[1]\n", - "fastages['session'] = fastages.session_id.str.split('-').str[1]\n", + "# Use configured path and helper function\n", + "fastages = pd.read_csv(cfg.fastages_tsv, sep='\\t')\n", + "fastages = dp.parse_bids_ids(fastages)\n", "fastages['age_years'] = fastages.age / 12\n" ] }, @@ -173,12 +152,7 @@ "metadata": {}, "outputs": [], "source": [ - "cfg.metric_cols = ['Subcortical gray matter volume',\n", - " 'Total gray matter volume', 'Total cortical gray matter volume',\n", - " 'Mask Volume', 'Brain Segmentation Volume Without Ventricles',\n", - " 'Left hemisphere cerebral white matter volume',\n", - " 'Right hemisphere cerebral white matter volume',\n", - " 'Total cerebral white matter volume']" + "# metric_cols are already defined in cfg.metric_cols - no need to redefine" ] }, { @@ -1876,8 +1850,8 @@ "gv_df = gv_df.merge(gv_df_ct1r5, how='left', on=['subject', 'session'])\n", "\n", "\n", - "ses_info['subject'] = ses_info.participant_id.str.split('-').str[1]\n", - "ses_info['session'] = ses_info.session_id.str.split('-').str[1]\n", + "# Use helper function instead of manual parsing\n", + "ses_info = dp.parse_bids_ids(ses_info)\n", "ses_info['age_years'] = ses_info.age / 12\n", "pml = len(gv_df)\n", "gv_df = gv_df.merge(ses_info.loc[:, ['subject', 'session', 'age_years', 'site_id']], how='left', on=['subject', 'session'])\n", diff --git a/notebooks/figures.ipynb b/notebooks/figures.ipynb index 720b364..908fe16 100644 --- a/notebooks/figures.ipynb +++ b/notebooks/figures.ipynb @@ -89,29 +89,8 @@ "metadata": {}, "outputs": [], "source": [ - "dfs = {}\n", - "\n", - "for pipeline, suffix, how in cfg.cfg.pipelines:\n", - " fn = cfg.stats_dir / pipeline / \"BrainConfounds.tsv\"\n", - " df = pd.read_csv(fn, sep=\"\\t\")\n", - " # rename metric cols by appending suffix\n", - " df.columns = [\"subject\", \"session\"] + [m + suffix for m in cfg.cfg.metric_cols]\n", - " dfs[suffix] = (df, how)\n", - "\n", - "# merge them in sequence\n", - "merged, _ = dfs[\"_ra\"] # start with recon-all (_ra)\n", - "for suffix, how in [(s, h) for (_, s, h) in cfg.cfg.pipelines[1:]]:\n", - " df, merge_how = dfs[suffix]\n", - " merged = merged.merge(df, on=[\"subject\", \"session\"], how=merge_how)\n", - "\n", - "# Fix col names\n", - "\n", - "merged.columns = (\n", - " merged.columns.str.strip().str.replace(r\"[\\s\\-]+\", \"_\", regex=True).str.lower()\n", - ")\n", - "\n", - "# Scale\n", - "merged.iloc[:, 2:] /= 10000" + "# Use helper function instead of manual loading and merging\n", + "merged = du.load_and_merge_brain_confounds()" ] }, { @@ -139,8 +118,8 @@ "metadata": {}, "outputs": [], "source": [ - "ses_info[\"subject\"] = ses_info.participant_id.str.split(\"-\").str[1]\n", - "ses_info[\"session\"] = ses_info.session_id.str.split(\"-\").str[1]\n", + "# Use helper function instead of manual parsing\n", + "ses_info = du.parse_bids_ids(ses_info)\n", "ses_info[\"age_years\"] = ses_info.age / 12\n", "pml = len(merged)\n", "with_age = merged.merge(\n", @@ -176,8 +155,8 @@ "outputs": [], "source": [ "fastages = pd.read_csv(cfg.fastages_tsv, sep=\"\\t\")\n", - "fastages[\"subject\"] = fastages.participant_id.str.split(\"-\").str[1]\n", - "fastages[\"session\"] = fastages.session_id.str.split(\"-\").str[1]\n", + "# Use helper function instead of manual parsing\n", + "fastages = du.parse_bids_ids(fastages)\n", "fastages[\"age_years\"] = fastages.age / 12\n", "\n", "with_all_ages = with_sex.merge(\n", diff --git a/notebooks/run_freesurfer8-recon-all.ipynb b/notebooks/run_freesurfer8-recon-all.ipynb index 0c0b203..df2fcae 100644 --- a/notebooks/run_freesurfer8-recon-all.ipynb +++ b/notebooks/run_freesurfer8-recon-all.ipynb @@ -13,7 +13,10 @@ "id": "1", "metadata": {}, "source": [ - "2025/05/08: Added FS8 recon-all" + "- 2025/05/08: Added FS8 recon-all\n", + "- 2025/09/19: Refactor, tidy\n", + "- 2025/09/21: added SynthSR variation pipeline.\n", + "- 2025/09/21: Run recon-all 8.1 to check for differences" ] }, { @@ -23,12 +26,8 @@ "metadata": {}, "outputs": [], "source": [ - "import shutil\n", - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\"display.max_rows\", 200)" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { @@ -38,11 +37,16 @@ "metadata": {}, "outputs": [], "source": [ - "pipeline_version = \"all\"\n", - "freesurfer_module_version = \"freesurfer/8.0.0\"\n", - "pipeline_dirname = \"recon-all-8\"\n", - "ncpus = 10\n", - "zres_vals = [3, 5]" + "import shutil\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from dl_morphometrics_helpers import data_processing as dp\n", + "from dl_morphometrics_helpers.constants import abcd_data, drv_dir, project_dir\n", + "from dl_morphometrics_helpers.dlm_utils import build_fs_swarm_cmd\n", + "\n", + "pd.set_option(\"display.max_rows\", 200)" ] }, { @@ -51,38 +55,42 @@ "id": "4", "metadata": {}, "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], "source": [ - "project_dir = Path(\"/data/ABCD_MBDU/ohbm2024/\")\n", - "bids_dir = Path(\"/data/ABCD_DSST/ABCD/imaging_data/fast_track/rawdata/\")\n", - "fsres_dir = project_dir / \"data/derivatives/freesurfer/\"\n", - "swarm_cmd_dir = project_dir / \"swarm/swarm_commands\"\n", - "swarm_log_dir = project_dir / \"swarm/swarm_logs\"\n", - "\n", - "balanced_scans = pd.read_csv(project_dir / \"code/balanced_scans.csv\")" + "zres_vals = [3, 5]\n", + "bids_dir = abcd_data / \"imaging_data/fast_track/rawdata/\"\n", + "fsres_dir = drv_dir / \"freesurfer/\"\n", + "swarm_kwargs_default = {\n", + " \"test\": True,\n", + " \"pipeline_version\": \"all\",\n", + " \"pipeline_dirname\": \"recon-all-8\",\n", + " \"run_tag\": \"fs-8-leej3\",\n", + " \"ncpus\": 10,\n", + " \"g_mem\": 40,\n", + " \"lscratch\": 400,\n", + " \"time\": \"12:00:00\",\n", + " \"partition\": \"norm\",\n", + " \"freesurfer_module_version\": \"freesurfer/8.0.0\",\n", + " \"extra_modules\": (\"fsl\",),\n", + " \"swarm_cmd_dir\": project_dir / \"swarm/swarm_commands\",\n", + " \"swarm_log_dir\": project_dir / \"swarm/swarm_logs\",\n", + "}" ] }, { "cell_type": "code", "execution_count": null, - "id": "6", + "id": "5", "metadata": {}, "outputs": [], "source": [ + "balanced_scans = pd.read_csv(project_dir / \"code/balanced_scans.csv\")\n", "balanced_scans[\"path\"] = bids_dir / balanced_scans.filename" ] }, { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -92,7 +100,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -119,7 +127,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +136,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "9", "metadata": {}, "source": [ "# Run freesurfer 8 recon-all on full res T1 + T2\n" @@ -137,15 +145,15 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "10", "metadata": {}, "outputs": [], "source": [ - "ncpus = 10\n", + "swarm_kwargs = swarm_kwargs_default.copy()\n", "cmds = []\n", "\n", "for _, row in scans_to_run.iterrows():\n", - " fs_outdir = fsres_dir / pipeline_dirname / row.subject / row.session\n", + " fs_outdir = fsres_dir / swarm_kwargs[\"pipeline_dirname\"] / row.subject / row.session\n", " fs_outdir.mkdir(exist_ok=True, parents=True)\n", "\n", " cmd = f\"\"\"\\\n", @@ -154,7 +162,7 @@ "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", "recon-all \\\n", " -subjid {row.subject} \\\n", - " -openmp {ncpus} \\\n", + " -openmp {swarm_kwargs[\"ncpus\"]} \\\n", " -all \\\n", " -i {row.t1_path} \\\n", " -T2 {row.t2_path}; \\\n", @@ -170,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -180,27 +188,18 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "12", "metadata": {}, "outputs": [], "source": [ - "def get_fs_swarm_cmd(test=True):\n", - " run_name = f\"abcd_recon_{pipeline_version}-fs-8-leej3{'-test' if test else ''}\"\n", - "\n", - " swarm_cmd_file = swarm_cmd_dir / run_name\n", - " swarm_cmd_file.write_text(\"\\n\".join(cmds[:2] if test else cmds))\n", - " swarm_exec = f\"\"\"swarm -f {swarm_cmd_file.resolve()} -g 100 -t {ncpus} --gres=lscratch:400 --module {freesurfer_module_version},fsl --time 12:00:00 --logdir {swarm_log_dir.resolve()} --job-name {run_name} --partition norm\"\"\"\n", - " return swarm_exec\n", - "\n", - "\n", - "swarm_exec = get_fs_swarm_cmd()\n", + "swarm_exec = build_fs_swarm_cmd(cmds, **swarm_kwargs)\n", "swarm_exec" ] }, { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -211,7 +210,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -221,18 +220,18 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "15", "metadata": {}, "outputs": [], "source": [ - "full_swarm_exec = get_fs_swarm_cmd(False)\n", + "full_swarm_exec = build_fs_swarm_cmd(cmds, **{**swarm_kwargs, \"test\": False})\n", "full_swarm_exec" ] }, { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -242,7 +241,7 @@ }, { "cell_type": "markdown", - "id": "18", + "id": "17", "metadata": {}, "source": [ "# Rerun for many failed runs" @@ -251,113 +250,41 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "18", "metadata": {}, "outputs": [], "source": [ "# extracted manually with output from processing notebook.\n", - "failed_subs = [\n", - " \"sub-NDARINV01NAYMZH\",\n", - " \"sub-NDARINV04EUBGTM\",\n", - " \"sub-NDARINV086U18RD\",\n", - " \"sub-NDARINV08R2PTT1\",\n", - " \"sub-NDARINV0B7UGM1D\",\n", - " \"sub-NDARINV0JWTGKAD\",\n", - " \"sub-NDARINV0R5220TJ\",\n", - " \"sub-NDARINV0U317B9P\",\n", - " \"sub-NDARINV0X45NBYM\",\n", - " \"sub-NDARINV0Y8YJ2UR\",\n", - " \"sub-NDARINV0YVKYMJX\",\n", - " \"sub-NDARINV14C1N3KZ\",\n", - " \"sub-NDARINV1AKMLL9A\",\n", - " \"sub-NDARINV1EGW0J5N\",\n", - " \"sub-NDARINV1F5JEP46\",\n", - " \"sub-NDARINV1JXDFV9Z\",\n", - " \"sub-NDARINV1KXK7MDF\",\n", - " \"sub-NDARINV20GGC8X5\",\n", - " \"sub-NDARINV21EBHGF9\",\n", - " \"sub-NDARINV249JM0NY\",\n", - " \"sub-NDARINV29P0F670\",\n", - " \"sub-NDARINV2F51HZAP\",\n", - " \"sub-NDARINV2FV9YY14\",\n", - " \"sub-NDARINV2P5R504F\",\n", - " \"sub-NDARINV2RD4CZ7T\",\n", - " \"sub-NDARINV2T1G705T\",\n", - " \"sub-NDARINV2WYPW02N\",\n", - " \"sub-NDARINV2Z2HJFG1\",\n", - " \"sub-NDARINV32KLKRZC\",\n", - " \"sub-NDARINV330HNJEV\",\n", - " \"sub-NDARINV3AGKBZAW\",\n", - " \"sub-NDARINV3CW8YR0W\",\n", - " \"sub-NDARINV3HEHP1P4\",\n", - " \"sub-NDARINV45BG08PF\",\n", - " \"sub-NDARINV47UWAATW\",\n", - " \"sub-NDARINV47W6DHJC\",\n", - " \"sub-NDARINV486UUF4D\",\n", - " \"sub-NDARINV4PYRUPX6\",\n", - " \"sub-NDARINV512RTCH1\",\n", - " \"sub-NDARINV51ZUKMA3\",\n", - " \"sub-NDARINV52AE3MBX\",\n", - " \"sub-NDARINV52XG9LJ3\",\n", - " \"sub-NDARINV592319RL\",\n", - " \"sub-NDARINV5C9WJ4B3\",\n", - " \"sub-NDARINV5G3DP835\",\n", - " \"sub-NDARINV5N40CFL4\",\n", - " \"sub-NDARINV5V2AYDE7\",\n", - " \"sub-NDARINV60TVEFTU\",\n", - " \"sub-NDARINV665J58DF\",\n", - " \"sub-NDARINV68WXH24G\",\n", - " \"sub-NDARINV69FLDML4\",\n", - " \"sub-NDARINV6B3HFDAY\",\n", - " \"sub-NDARINV6KEGPJ6J\",\n", - " \"sub-NDARINV6X9U8P56\",\n", - " \"sub-NDARINV71136NP6\",\n", - " \"sub-NDARINV7FG8NTPP\",\n", - " \"sub-NDARINV7G30XLFU\",\n", - " \"sub-NDARINV7J9K5U1L\",\n", - " \"sub-NDARINV7WT9690H\",\n", - " \"sub-NDARINV8451EHXW\",\n", - " \"sub-NDARINV85UUUHN0\",\n", - " \"sub-NDARINV87L30DBB\",\n", - " \"sub-NDARINV87WRYJ7F\",\n", - " \"sub-NDARINV88V8C4GJ\",\n", - " \"sub-NDARINV8CBT9W65\",\n", - " \"sub-NDARINV8J5VU553\",\n", - " \"sub-NDARINV8L7MBY64\",\n", - " \"sub-NDARINV8MGGJ4FD\",\n", - " \"sub-NDARINV8T2NUL38\",\n", - " \"sub-NDARINV956C4ZGG\",\n", - " \"sub-NDARINV95YMR61C\",\n", - " \"sub-NDARINV974A9111\",\n", - " \"sub-NDARINV9EYHTYJT\",\n", - " \"sub-NDARINV9HB34LLU\",\n", - " \"sub-NDARINV9L4P2RJJ\",\n", - " \"sub-NDARINV9R0Z0T45\",\n", - " \"sub-NDARINV9UV6PK7Y\",\n", - " \"sub-NDARINVA3VX7WRD\",\n", - " \"sub-NDARINVA4KJXLYH\",\n", - " \"sub-NDARINVA4MKK2EJ\",\n", - " \"sub-NDARINVA68WCRUL\",\n", - " \"sub-NDARINVBBH4GW2D\",\n", - " \"sub-NDARINVBG7FNXZB\",\n", - "]" + "failed_subs = \"\"\"\n", + " sub-NDARINV01NAYMZH sub-NDARINV04EUBGTM sub-NDARINV086U18RD sub-NDARINV08R2PTT1 sub-NDARINV0B7UGM1D sub-NDARINV0JWTGKAD sub-NDARINV0R5220TJ sub-NDARINV0U317B9P sub-NDARINV0X45NBYM sub-NDARINV0Y8YJ2UR sub-NDARINV0YVKYMJX sub-NDARINV14C1N3KZ sub-NDARINV1AKMLL9A sub-NDARINV1EGW0J5N sub-NDARINV1F5JEP46 sub-NDARINV1JXDFV9Z sub-NDARINV1KXK7MDF sub-NDARINV20GGC8X5 sub-NDARINV21EBHGF9 sub-NDARINV249JM0NY sub-NDARINV29P0F670 sub-NDARINV2F51HZAP sub-NDARINV2FV9YY14 sub-NDARINV2P5R504F sub-NDARINV2RD4CZ7T sub-NDARINV2T1G705T sub-NDARINV2WYPW02N sub-NDARINV2Z2HJFG1 sub-NDARINV32KLKRZC sub-NDARINV330HNJEV sub-NDARINV3AGKBZAW sub-NDARINV3CW8YR0W sub-NDARINV3HEHP1P4 sub-NDARINV45BG08PF sub-NDARINV47UWAATW sub-NDARINV47W6DHJC sub-NDARINV486UUF4D sub-NDARINV4PYRUPX6 sub-NDARINV512RTCH1\n", + " sub-NDARINV51ZUKMA3 sub-NDARINV52AE3MBX sub-NDARINV52XG9LJ3 sub-NDARINV592319RL sub-NDARINV5C9WJ4B3 sub-NDARINV5G3DP835 sub-NDARINV5N40CFL4 sub-NDARINV5V2AYDE7 sub-NDARINV60TVEFTU sub-NDARINV665J58DF sub-NDARINV68WXH24G sub-NDARINV69FLDML4 sub-NDARINV6B3HFDAY sub-NDARINV6KEGPJ6J sub-NDARINV6X9U8P56 sub-NDARINV71136NP6 sub-NDARINV7FG8NTPP sub-NDARINV7G30XLFU sub-NDARINV7J9K5U1L sub-NDARINV7WT9690H sub-NDARINV8451EHXW sub-NDARINV85UUUHN0 sub-NDARINV87L30DBB sub-NDARINV87WRYJ7F sub-NDARINV88V8C4GJ sub-NDARINV8CBT9W65 sub-NDARINV8J5VU553 sub-NDARINV8L7MBY64 sub-NDARINV8MGGJ4FD sub-NDARINV8T2NUL38 sub-NDARINV956C4ZGG sub-NDARINV95YMR61C sub-NDARINV974A9111 sub-NDARINV9EYHTYJT sub-NDARINV9HB34LLU sub-NDARINV9L4P2RJJ sub-NDARINV9R0Z0T45 sub-NDARINV9UV6PK7Y sub-NDARINVA3VX7WRD\n", + " sub-NDARINVA4KJXLYH sub-NDARINVA4MKK2EJ sub-NDARINVA68WCRUL sub-NDARINVBBH4GW2D sub-NDARINVBG7FNXZB\n", + " \"\"\".split()" ] }, { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "19", "metadata": {}, "outputs": [], "source": [ - "ncpus = 10\n", + "rerun_swarm_kwargs = swarm_kwargs.copy()\n", + "rerun_swarm_kwargs.update(\n", + " test=True,\n", + " run_tag=\"fs-8-rerun-leej3\",\n", + " g_mem=50,\n", + " time=\"60:00:00\",\n", + ")\n", "rerun_cmds = []\n", "\n", "for _, row in scans_to_run.iterrows():\n", " if row.subject not in failed_subs:\n", " continue\n", - " fs_outdir = fsres_dir / pipeline_dirname / row.subject / row.session\n", - " shutil.rmtree(fs_outdir.parent, ignore_errors=True)\n", + " fs_outdir = (\n", + " fsres_dir / rerun_swarm_kwargs[\"pipeline_dirname\"] / row.subject / row.session\n", + " )\n", + " shutil.rmtree(fs_outdir, ignore_errors=True)\n", " fs_outdir.mkdir(exist_ok=True, parents=True)\n", "\n", " cmd = f\"\"\"\\\n", @@ -366,7 +293,7 @@ "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", "recon-all \\\n", " -subjid {row.subject} \\\n", - " -openmp {ncpus} \\\n", + " -openmp {rerun_swarm_kwargs[\"ncpus\"]} \\\n", " -all \\\n", " -i {row.t1_path} \\\n", " -T2 {row.t2_path}; \\\n", @@ -382,13 +309,24 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "20", "metadata": {}, "outputs": [], "source": [ "rerun_cmds[:1]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "swarm_exec = build_fs_swarm_cmd(rerun_cmds, **rerun_swarm_kwargs)\n", + "swarm_exec" + ] + }, { "cell_type": "code", "execution_count": null, @@ -396,25 +334,426 @@ "metadata": {}, "outputs": [], "source": [ - "def get_fs_swarm_cmd(cmds, test=True):\n", - " run_name = (\n", - " f\"abcd_recon_{pipeline_version}-fs-8-rerun-leej3{'-test' if test else ''}\"\n", + "job_id = !{swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "!scancel 56475212" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "full_swarm_exec = build_fs_swarm_cmd(\n", + " rerun_cmds, **{**rerun_swarm_kwargs, \"test\": False}\n", + ")\n", + "full_swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{full_swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "## Results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "!tree {fsres_dir}/{rerun_swarm_kwargs[\"pipeline_dirname\"]}/sub-NDARINV00HEV6HB" + ] + }, + { + "cell_type": "markdown", + "id": "28", + "metadata": {}, + "source": [ + "# SynthSR exploration" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "### Control" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "synthsr_control_swarm_kwargs = swarm_kwargs_default.copy()\n", + "synthsr_control_swarm_kwargs.update(\n", + " test=True,\n", + " run_tag=\"fs-8-leej3-synthsr-control\",\n", + " pipeline_dirname=\"synthsr-control\",\n", + ")\n", + "synth_df = dp.get_synthsr_for_pipeline(fsres_dir, \"recon-all-8\")\n", + "\n", + "cmds_synthsr_control = []\n", + "\n", + "for _, row in synth_df.iterrows():\n", + " fs_outdir = (\n", + " fsres_dir\n", + " / synthsr_control_swarm_kwargs[\"pipeline_dirname\"]\n", + " / row.subject\n", + " / row.session\n", + " )\n", + " shutil.rmtree(fs_outdir, ignore_errors=True)\n", + " fs_outdir.mkdir(exist_ok=True, parents=True)\n", + "\n", + " cmd = f\"\"\"\\\n", + "export SUBJECTS_DIR=/lscratch/$SLURM_JOBID/fs_out; \\\n", + "mkdir $SUBJECTS_DIR; \\\n", + "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", + "recon-all \\\n", + " -subjid {row.subject} \\\n", + " -openmp {synthsr_control_swarm_kwargs[\"ncpus\"]} \\\n", + " -all \\\n", + " -noskullstrip \\\n", + " -i {row.t1_path} \\\n", + "rsync -a /lscratch/$SLURM_JOBID/fs_out {fs_outdir}; \\\n", + "chown -R :ABCD_MBDU {fs_outdir}; \\\n", + "chmod -R g+rw {fs_outdir}; \\\n", + "rm -rf /lscratch/$SLURM_JOBID/* \\\n", + "\"\"\"\n", + "\n", + " cmds_synthsr_control.append(cmd)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31", + "metadata": {}, + "outputs": [], + "source": [ + "fsres_dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "cmds_synthsr_control[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "swarm_exec = build_fs_swarm_cmd(cmds_synthsr_control, **synthsr_control_swarm_kwargs)\n", + "swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "!scancel {job_id}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "full_swarm_exec = build_fs_swarm_cmd(\n", + " cmds_synthsr_control, **{**synthsr_control_swarm_kwargs, \"test\": False}\n", + ")\n", + "full_swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{full_swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "markdown", + "id": "38", + "metadata": {}, + "source": [ + "### recon-any (not run in entirety)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [ + "synthsr_any_swarm_kwargs = swarm_kwargs_default.copy()\n", + "synthsr_any_swarm_kwargs.update(\n", + " test=True,\n", + " run_tag=\"fs-8-leej3-synthsr-any\",\n", + " pipeline_dirname=\"synthsr-any\",\n", + " freesurfer_module_version=\"freesurfer/8.1.0\",\n", + ")\n", + "synth_df = dp.get_synthsr_for_pipeline(fsres_dir, \"recon-any_t1_resample-5\")\n", + "\n", + "cmds_synthsr_any = []\n", + "\n", + "for _, row in synth_df.iterrows():\n", + " fs_outdir = (\n", + " fsres_dir\n", + " / synthsr_any_swarm_kwargs[\"pipeline_dirname\"]\n", + " / row.subject\n", + " / row.session\n", " )\n", + " shutil.rmtree(fs_outdir, ignore_errors=True)\n", + " fs_outdir.mkdir(exist_ok=True, parents=True)\n", "\n", - " swarm_cmd_file = swarm_cmd_dir / run_name\n", - " swarm_cmd_file.write_text(\"\\n\".join(cmds[:2] if test else cmds))\n", - " swarm_exec = f\"\"\"swarm -f {swarm_cmd_file.resolve()} -g 50 -t {ncpus} --gres=lscratch:400 --module {freesurfer_module_version},fsl --time 60:00:00 --logdir {swarm_log_dir.resolve()} --job-name {run_name} --partition norm\"\"\"\n", - " return swarm_exec\n", + " cmd = f\"\"\"\\\n", + "export SUBJECTS_DIR=/lscratch/$SLURM_JOBID/fs_out; \\\n", + "mkdir $SUBJECTS_DIR; \\\n", + "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", + "recon-all \\\n", + " -subjid {row.subject} \\\n", + " -openmp {synthsr_any_swarm_kwargs[\"ncpus\"]} \\\n", + " -all \\\n", + " -noskullstrip \\\n", + " -i {row.t1_path} \\\n", + " -xmask {Path(row.t1_path).with_name(\"brainmask.mgz\")}; \\\n", + "rsync -a /lscratch/$SLURM_JOBID/fs_out {fs_outdir}; \\\n", + "chown -R :ABCD_MBDU {fs_outdir}; \\\n", + "chmod -R g+rw {fs_outdir}; \\\n", + "rm -rf /lscratch/$SLURM_JOBID/* \\\n", + "\"\"\"\n", "\n", + " cmds_synthsr_any.append(cmd)\n", + "len(cmds_synthsr_any)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "len(cmds_synthsr_any)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "cmds_synthsr_any[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43", + "metadata": {}, + "outputs": [], + "source": [ + "swarm_exec = build_fs_swarm_cmd(cmds_synthsr_any, **synthsr_any_swarm_kwargs)\n", + "swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45", + "metadata": {}, + "outputs": [], + "source": [ + "!scancel {job_id}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46", + "metadata": {}, + "outputs": [], + "source": [ + "full_swarm_exec = build_fs_swarm_cmd(\n", + " cmds_synthsr_any, **{**synthsr_any_swarm_kwargs, \"test\": False}\n", + ")\n", + "full_swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{full_swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "markdown", + "id": "48", + "metadata": {}, + "source": [ + "### recon-all clinical" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49", + "metadata": {}, + "outputs": [], + "source": [ + "synthsr_all_clinical_swarm_kwargs = swarm_kwargs_default.copy()\n", + "synthsr_all_clinical_swarm_kwargs.update(\n", + " test=True,\n", + " run_tag=\"fs-8-leej3-synthsr-rac-with-skullstrip\",\n", + " pipeline_dirname=\"synthsr-rac-with-skullstrip\",\n", + " freesurfer_module_version=\"freesurfer/8.1.0\",\n", + ")\n", + "synth_df = dp.get_synthsr_for_pipeline(\n", + " fsres_dir,\n", + " \"recon-all_clinical_t1_resample-5\",\n", + " glob_pattern=\"*/*/fs_out/*/mri/synthSR.raw.mgz\",\n", + ")\n", + "\n", + "cmds_synthsr_rac = []\n", + "\n", + "for _, row in synth_df.iterrows():\n", + " fs_outdir = (\n", + " fsres_dir\n", + " / synthsr_all_clinical_swarm_kwargs[\"pipeline_dirname\"]\n", + " / row.subject\n", + " / row.session\n", + " )\n", + " shutil.rmtree(fs_outdir, ignore_errors=True)\n", + " fs_outdir.mkdir(exist_ok=True, parents=True)\n", + "\n", + " cmd = f\"\"\"\\\n", + "export SUBJECTS_DIR=/lscratch/$SLURM_JOBID/fs_out; \\\n", + "mkdir $SUBJECTS_DIR; \\\n", + "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", + "recon-all \\\n", + " -subjid {row.subject} \\\n", + " -openmp {synthsr_all_clinical_swarm_kwargs[\"ncpus\"]} \\\n", + " -all \\\n", + " -i {row.t1_path}; \\\n", + "rsync -a /lscratch/$SLURM_JOBID/fs_out {fs_outdir}; \\\n", + "chown -R :ABCD_MBDU {fs_outdir}; \\\n", + "chmod -R g+rw {fs_outdir}; \\\n", + "rm -rf /lscratch/$SLURM_JOBID/* \\\n", + "\"\"\"\n", "\n", - "swarm_exec = get_fs_swarm_cmd(rerun_cmds)\n", + " cmds_synthsr_rac.append(cmd)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50", + "metadata": {}, + "outputs": [], + "source": [ + "cmds_synthsr_rac[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51", + "metadata": {}, + "outputs": [], + "source": [ + "swarm_exec = build_fs_swarm_cmd(cmds_synthsr_rac, **synthsr_all_clinical_swarm_kwargs)\n", "swarm_exec" ] }, { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -425,28 +764,30 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "53", "metadata": {}, "outputs": [], "source": [ - "!scancel 56475212" + "!scancel {job_id[0]}" ] }, { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "54", "metadata": {}, "outputs": [], "source": [ - "full_swarm_exec = get_fs_swarm_cmd(rerun_cmds, False)\n", + "full_swarm_exec = build_fs_swarm_cmd(\n", + " cmds_synthsr_rac, **{**synthsr_all_clinical_swarm_kwargs, \"test\": False}\n", + ")\n", "full_swarm_exec" ] }, { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "55", "metadata": {}, "outputs": [], "source": [ @@ -456,45 +797,117 @@ }, { "cell_type": "markdown", - "id": "27", + "id": "56", "metadata": {}, "source": [ - "## Results\n" + "### recon-all 8.1 run" ] }, { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "57", "metadata": {}, "outputs": [], "source": [ - "!tree {fsres_dir}/{pipeline_dirname}/sub-NDARINV00HEV6HB" + "swarm_kwargs_8_1 = swarm_kwargs_default.copy()\n", + "swarm_kwargs_8_1.update(\n", + " test=True,\n", + " run_tag=\"fs-8_1-leej3\",\n", + " pipeline_dirname=\"recon-all-8_1\",\n", + " freesurfer_module_version=\"freesurfer/8.1.0\",\n", + ")\n", + "cmds_8_1 = []\n", + "\n", + "for _, row in scans_to_run.iterrows():\n", + " fs_outdir = (\n", + " fsres_dir / swarm_kwargs_8_1[\"pipeline_dirname\"] / row.subject / row.session\n", + " )\n", + " shutil.rmtree(fs_outdir, ignore_errors=True)\n", + " fs_outdir.mkdir(exist_ok=True, parents=True)\n", + "\n", + " cmd = f\"\"\"\\\n", + "export SUBJECTS_DIR=/lscratch/$SLURM_JOBID/fs_out; \\\n", + "mkdir $SUBJECTS_DIR; \\\n", + "source $FREESURFER_HOME/SetUpFreeSurfer.sh; \\\n", + "recon-all \\\n", + " -subjid {row.subject} \\\n", + " -openmp {swarm_kwargs_8_1[\"ncpus\"]} \\\n", + " -all \\\n", + " -i {row.t1_path} \\\n", + " -T2 {row.t2_path}; \\\n", + "rsync -a /lscratch/$SLURM_JOBID/fs_out {fs_outdir}; \\\n", + "chown -R :ABCD_MBDU {fs_outdir}; \\\n", + "chmod -R g+rw {fs_outdir}; \\\n", + "rm -rf /lscratch/$SLURM_JOBID/* \\\n", + "\"\"\"\n", + "\n", + " cmds_8_1.append(cmd)" ] }, { "cell_type": "code", "execution_count": null, - "id": "29", + "id": "58", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "cmds_8_1[:1]" + ] }, { "cell_type": "code", "execution_count": null, - "id": "30", + "id": "59", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "swarm_exec = build_fs_swarm_cmd(cmds_8_1, **swarm_kwargs_8_1)\n", + "swarm_exec" + ] }, { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "60", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "job_id = !{swarm_exec}\n", + "print(job_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61", + "metadata": {}, + "outputs": [], + "source": [ + "!scancel 1212306" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62", + "metadata": {}, + "outputs": [], + "source": [ + "full_swarm_exec = build_fs_swarm_cmd(cmds_8_1, **{**swarm_kwargs_8_1, \"test\": False})\n", + "full_swarm_exec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = !{full_swarm_exec}\n", + "print(job_id)" + ] } ], "metadata": { @@ -513,7 +926,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.13.2" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 48961a8..dcb6dd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,13 +3,15 @@ name = "dl_morphometrics_biases" version = "0.1.0" description = "Deep learning morphometrics bias analysis tools and utilities" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.13" authors = [ {name = "John Lee", email = "johnleenimh@gmail.com"}, ] dependencies = [ "pandas>=1.5.0", "numpy>=1.24.0", + "jupyterlab>=4.4.6", + "pre-commit>=4.3.0", ] [project.optional-dependencies] @@ -47,7 +49,7 @@ exclude = [ [tool.ruff] line-length = 88 -target-version = "py39" +target-version = "py313" [tool.ruff.lint] select = [