diff --git a/.github/workflows/download_dataset.yml b/.github/workflows/download_dataset.yml new file mode 100644 index 00000000..cb58884e --- /dev/null +++ b/.github/workflows/download_dataset.yml @@ -0,0 +1,75 @@ +name: Download dataset + +on: + push: + branches: + - jn/4813-debug-sct-installer-repo-issues + # release: + # types: [published] + +jobs: + run_dataset_download: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Python 3 + uses: actions/setup-python@v4 + with: + # python-version: 3.9.16 + python-version: 3.8.18 + + # https://github.com/spinalcordtoolbox/spinalcordtoolbox/blob/master/.ci.sh + - name: Install SCT + run: | + cd .. # avoid long path name by not installing in repo subfolder + # source python/etc/profile.d/conda.sh >> $GITHUB_ENV + git clone https://github.com/spinalcordtoolbox/spinalcordtoolbox.git + cd spinalcordtoolbox + ./.ci.sh -i + # NB: install_sct edits ~/.bashrc, but those environment changes don't get passed to subsequent steps in GH Actions. + # So, we filter through the .bashrc and pass the values to $GITHUB_ENV and $GITHUB_PATH. + # Relevant documentation: https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#environment-files + cat ~/.bashrc | grep "export SCT_DIR" | cut -d " " -f 2 >> $GITHUB_ENV + cat ~/.bashrc | grep "export PATH" | grep -o "/.*" | cut -d ':' -f 1 >> $GITHUB_PATH + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # pip install -r requirements.txt + + # - name: Set up Python + # uses: actions/setup-python@v4 + # with: + # python-version: '3.9.16' + + # - name: Install dependencies + # run: | + # conda install -y git-annex + + # - name: Ensure git-annex is accessible via git + # run: | + # echo "$(conda info --base)/bin" >> $GITHUB_PATH + # export PATH="$(conda info --base)/bin:$PATH" + # which git-annex + # git-annex version + + - name: Configure Git identity + run: | + git config --global user.email "actions@github.com" + git config --global user.name "GitHub Actions" + + # - name: Run dataset setup script + # shell: bash -l {0} # Ensures the Conda environment is properly loaded + # run: | + # chmod +x scripts/compute_morphometrics_spine_generic.sh + # source scripts/compute_morphometrics_spine_generic.sh + + # - name: Run model script + # run: ./scripts/run_model.sh + + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 diff --git a/csa_generate_figures/analyse_csa_across_releases.py b/csa_generate_figures/analyse_csa_across_releases.py new file mode 100644 index 00000000..84478f3f --- /dev/null +++ b/csa_generate_figures/analyse_csa_across_releases.py @@ -0,0 +1,206 @@ +""" +Generate violin plots from CSV data across different model releases/versions. + +Usage: + python analyse_csa_across_releases.py -i /path/to/data.csv -a [methods, resolutions, thresholds] +""" + +import os +import argparse +import pandas as pd +import re +import seaborn as sns +import matplotlib.pyplot as plt +import glob + +# Setting the hue order as specified +FONTSIZE = 12 +CONTRAST_ORDER = ["DWI", "MTon", "MToff", "T1w", "T2star", "T2w"] + + +def fetch_participant_id(filename_path): + """ + Get participant_id from the input BIDS-compatible filename or file path + :return: participant_id: subject ID (e.g., sub-001) + """ + + _, filename = os.path.split(filename_path) # Get just the filename (i.e., remove the path) + participant = re.search('sub-(.*?)[_/]', filename_path) # [_/] slash or underscore + participant_id = participant.group(0)[:-1] if participant else "" # [:-1] removes the last underscore or slash + # REGEX explanation + # \d - digit + # \d? - no or one occurrence of digit + # *? - match the previous element as few times as possible (zero or more times) + # . - any character + + return participant_id + + +# Function to extract contrast and method from the filename +def extract_contrast_and_details(filename, model_versions): + """ + Extract the segmentation method (e.g., deepseg (2d), v2.0 (monai), v3.0 (nnunet)) from the filename. + """ + # pattern = r'.*_(DWI|MTon|MToff|T1w|T2star|T2w).*_(softseg_bin|deepseg|).*' + pattern = r'.*_(DWI|MTon|MToff|T1w|T2star|T2w).*_(softseg_bin|).*' + + # Extract existing values in the second capturing group + match = re.search(r'_\(softseg_bin\|?(.*?)\)\._', pattern) # Extract contents inside (softseg_bin|...) + existing_versions = set(match.group(1).split('|')) if match and match.group(1) else set() + + # Merge existing versions with new model versions + updated_versions = existing_versions.union(model_versions) + + # Rebuild the pattern, updating only the second capturing group + updated_pattern = re.sub( + r'\(softseg_bin\|?.*?\)', # Match the existing group + f"(softseg_bin|{'|'.join(sorted(updated_versions))})", # Replace with updated group + pattern + ) + + match = re.search(updated_pattern, filename) + if match: + return match.group(1), match.group(2) + else: + return 'Unknown', 'Unknown' + + +def generate_figure_csa(file_path, data, method=None): + """ + Generate violinplot showing absolute CSA error for each contrast for a given method/threshold + """ + + if method is not None: + + # create a dataframe with only the given method and get the mean CSA for each contrast + df = data[data['Method'] == method].groupby(['Contrast', 'Participant'])['MEAN(area)'].agg(['mean']).reset_index() + + # define title for the plot + title = f'Method: {method}; CSA across MRI contrasts' + + # define save path for the plot + save_fname = f"csa__model_{method}.png" + + # plot the abs error for each contrast in a violinplot + plt.figure(figsize=(12, 6)) + sns.violinplot(x='Contrast', y='mean', data=df, order=CONTRAST_ORDER, palette="Set2") + # overlay swarm plot on the violin plot to show individual data points + sns.swarmplot(x='Contrast', y='mean', data=df, color='k', order=CONTRAST_ORDER, size=3) + plt.xlabel(None) + plt.ylabel('CSA [mm^2]', fontweight='bold' ,fontsize=FONTSIZE) + plt.title(title) + # Add horizontal dashed grid + plt.grid(axis='y', alpha=0.5, linestyle='dashed') + # set the y-axis limits + plt.ylim(40, 110) + plt.yticks(range(40, 110, 5)) + + plt.xticks(range(len(CONTRAST_ORDER)), CONTRAST_ORDER, fontsize=FONTSIZE) + + # Get y-axis limits + ymin, ymax = plt.gca().get_ylim() + + # Compute the mean +- std across resolutions for each method and place it above the corresponding violin + for contrast in CONTRAST_ORDER: + mean = df[df['Contrast'] == contrast]['mean'].mean() + std = df[df['Contrast'] == contrast]['mean'].std() + plt.text(CONTRAST_ORDER.index(contrast), ymax-5, f'{mean:.2f} +- {std:.2f}', ha='center', va='bottom', color='k', fontsize=FONTSIZE) + + # Save the figure in 300 DPI as a PNG file + save_path = os.path.join(file_path, save_fname) + plt.savefig(save_path, dpi=300) + print(f'Figure saved to {save_path}') + + + +def generate_figure_std_csa(data, file_path, across="Method", hue_order=None): + """ + Generate violinplot showing STD across participants for each method + """ + + # Compute mean and std across contrasts for each method + df = data.groupby([across, 'Participant'])['MEAN(area)'].agg(['mean', 'std']).reset_index() + + plt.figure(figsize=(12, 6)) + sns.violinplot(x=across, y='std', data=df, order=hue_order, palette="Set2") + # overlay swarm plot on the violin plot to show individual data points + sns.swarmplot(x=across, y='std', data=df, color='k', order=hue_order, size=3) + + # Draw vertical line between 1st and 2nd violin + plt.axvline(x=0.5, color='k', linestyle='--') + + plt.xlabel(None) # plt.xlabel(across) + plt.ylabel('STD [mm^2]', fontweight='bold' ,fontsize=FONTSIZE) + plt.title(f'STD of C2-C3 CSA for each {across}', fontweight='bold' ,fontsize=FONTSIZE) + + XTICKS = [hue_order[0].replace('softseg_bin', 'GT')] + hue_order[1:] + XTICKS = [XTICKS[0]] + [f"model_{version}" for version in XTICKS[1:]] + plt.xticks(range(len(hue_order)), XTICKS, fontsize=FONTSIZE) + + # set upper y-axis limits + plt.ylim(-0.5, 8.5) + + # Get y-axis limits + ymin, ymax = plt.gca().get_ylim() + + # Compute the mean +- std across resolutions for each method and place it above the corresponding violin + for method in df['Method'].unique(): + mean = df[df['Method'] == method]['std'].mean() + std = df[df['Method'] == method]['std'].std() + plt.text(hue_order.index(method), ymax-1, f'{mean:.2f} +- {std:.2f}', ha='center', va='bottom', color='k') + + # Add horizontal dashed grid + plt.grid(axis='y', alpha=0.5, linestyle='dashed') + + # Save the figure in 300 DPI as a PNG file + save_path = os.path.join(file_path, f"std_c2c3_csa_across_versions.png") + plt.savefig(save_path, dpi=300) + print(f'Figure saved to {save_path}') + + +def main(args): + + csvs_list = glob.glob(os.path.join(args.i, "*.csv")) + # sort the list of CSV files + csvs_list.sort() + + # assuming 50 models released, we don't want to plot 50 violin plots, + # hence only take the most recent 5 models + csvs_list = csvs_list[-5:] + + # of the format: csa_c2c3__model_v2.0.csv, csa_c2c3__model_v3.0.csv + models_to_compare = [os.path.basename(f).split('__')[1].strip('.csv') for f in csvs_list] + model_versions = [model.split('_')[1] for model in models_to_compare] + + # define order of the violin plots + hue_order = ['softseg_bin'] + model_versions + + # merge the CSV files for each model release + if len(csvs_list) > 1: + data_avg_csa = pd.concat([pd.read_csv(f) for f in csvs_list], ignore_index=True) + else: + data_avg_csa = pd.read_csv(csvs_list[0]) + + # Apply the function to extract participant ID + data_avg_csa['Participant'] = data_avg_csa['Filename'].apply(fetch_participant_id) + + # Apply the function to extract method and the corresponding analysis details + data_avg_csa['Contrast'], data_avg_csa['Method'] = zip( + *data_avg_csa['Filename'].apply(extract_contrast_and_details, model_versions=model_versions)) + + # Generate violinplot showing STD across participants for each method + generate_figure_std_csa(data_avg_csa, file_path=args.i, across="Method", hue_order=hue_order) + + # Generate violinplot showing absolute CSA error for each contrast for a given method/threshold + for method in model_versions: + generate_figure_csa(file_path=args.i, data=data_avg_csa, method=method) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Generate violin plot from CSV data.') + parser.add_argument('-i', type=str, required=True, + help='Path to the folder containing CSV files C2-C3 CSA for all releases.' + 'Output will be saved in the same folder') + + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/scripts/compute_csa.sh b/scripts/compute_csa.sh new file mode 100644 index 00000000..da3cc970 --- /dev/null +++ b/scripts/compute_csa.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# +# Compute the CSA of a lifelong learning contrast-agnostic segmentation model on the spine-generic test dataset. +# To be used within the `compute_morphometrics_spine_generic.sh` script to run the CSA analysis on the latest version +# of the contrast-agnostic model +# +# Author: Naga Karthik +# + +# Uncomment for full verbose +set -x + +# Immediately exit if error +set -e -o pipefail + +# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) +trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT + +# Print retrieved variables from the sct_run_batch script to the log (to allow easier debug) +echo "Retrieved variables from from the caller sct_run_batch:" +echo "PATH_DATA: ${PATH_DATA}" +echo "PATH_DATA_PROCESSED: ${PATH_DATA_PROCESSED}" +echo "PATH_RESULTS: ${PATH_RESULTS}" +echo "PATH_LOG: ${PATH_LOG}" +echo "PATH_QC: ${PATH_QC}" + +# Variable passed by `sct_run_batch -script-args` +SUBJECT=$1 +MODEL_VERSION=$2 +# CUDA_DEVICE=$2 +PATH_NNUNET_SCRIPT=$3 # path to the nnUNet contrast-agnostic run_inference_single_subject.py +PATH_NNUNET_MODEL=$4 + +echo "SUBJECT: ${SUBJECT}" +echo "USING CUDA DEVICE ID: ${CUDA_DEVICE}" +echo "PATH_NNUNET_SCRIPT: ${PATH_NNUNET_SCRIPT}" +echo "PATH_NNUNET_MODEL: ${PATH_NNUNET_MODEL}" + +# ------------------------------------------------------------------------------ +# CONVENIENCE FUNCTIONS +# ------------------------------------------------------------------------------ + +# Check if manual label already exists. If it does, copy it locally. +# NOTE: manual disc labels should go from C1-C2 to C7-T1. +label_vertebrae(){ + local file="$1" + local contrast="$2" + + # Update global variable with segmentation file name + FILESEG="${file}_softseg_bin" + FILELABEL="${file}_discs" + + # Get vertebral levels by projecting discs on the spinal cord segmentation + # Note: we are using sct_label_utils over sct_label_vertebrae here to avoid straightening (which takes a lot of time) + sct_label_utils -i ${FILESEG}.nii.gz -disc ${FILELABEL}.nii.gz -o ${FILESEG}_labeled.nii.gz +} + + +# Copy GT spinal cord disc labels (located under derivatives/labels) +copy_gt_disc_labels(){ + local file="$1" + local type="$2" + local contrast="$3" + + if [[ $contrast == "T1w" ]] || [[ $contrast == "T2w" ]]; then + file_name="${file%%_*}_${contrast}_label-discs_dlabel" + elif [[ $contrast == "T2star" ]]; then + file_name="${file%%_*}_${contrast}_label-discs_desc-warp_dlabel" + elif [[ $contrast == "MTon" ]]; then + file_name="${file%%_*}_flip-1_mt-on_MTS_label-discs_desc-warp_dlabel" + elif [[ $contrast == "MToff" ]]; then + file_name="${file%%_*}_flip-2_mt-off_MTS_label-discs_desc-warp_dlabel" + elif [[ $contrast == "DWI" ]]; then + file_name="${file%%_*}_rec-average_dwi_label-discs_desc-warp_dlabel" + fi + # Construct file name to GT segmentation located under derivatives/labels + FILEDISCLABELS="${PATH_DATA}/derivatives/labels/${SUBJECT}/${type}/${file_name}.nii.gz" + echo "" + echo "Looking for manual disc labels: $FILEDISCLABELS" + if [[ -e $FILEDISCLABELS ]]; then + echo "Found! Copying ..." + rsync -avzh $FILEDISCLABELS ${file}_discs.nii.gz + else + echo "File ${FILEDISCLABELS} does not exist" >> ${PATH_LOG}/missing_files.log + echo "ERROR: Manual Disc Labels ${FILEDISCLABELS} does not exist. Exiting." + exit 1 + fi +} + + +# Copy GT soft binarized segmentation (located under derivatives/labels_softseg_bin) +copy_gt_softseg_bin(){ + local file="$1" + local type="$2" + # Construct file name to GT segmentation located under derivatives/labels_softseg_bin + # NOTE: the naming conventions are in the revised BIDS format + FILESEG="${PATH_DATA}/derivatives/labels_softseg_bin/${SUBJECT}/${type}/${file}_desc-softseg_label-SC_seg.nii.gz" + echo "" + echo "Looking for manual segmentation: $FILESEG" + if [[ -e $FILESEG ]]; then + echo "Found! Copying ..." + rsync -avzh $FILESEG ${file}_softseg_bin.nii.gz + else + echo "File ${FILESEG} does not exist" >> ${PATH_LOG}/missing_files.log + echo "ERROR: Manual Segmentation ${FILESEG} does not exist. Exiting." + exit 1 + fi +} + + +# Segment spinal cord +segment_sc(){ + local file="$1" + local file_gt_vert_label="$2" + local model_basename="$3" # 2d or 3d + local contrast="$4" # used only for saving output file name + # local kernel="$5" # 2d or 3d_fullres + + FILESEG="${file%%_*}_${contrast}_seg_${model_basename}" + + # Get the start time + start_time=$(date +%s) + # # Run SC segmentation + # python ${PATH_NNUNET_SCRIPT} -i ${file}.nii.gz -o ${FILESEG}.nii.gz -path-model ${PATH_NNUNET_MODEL}/nnUNetTrainer__nnUNetPlans__${kernel} -pred-type sc -use-gpu -use-best-checkpoint + # Run SC segmentation (natively with sct_deepseg) + sct_deepseg -task seg_sc_contrast_agnostic -i ${file}.nii.gz -o ${FILESEG}.nii.gz -qc ${PATH_QC} -qc-subject ${SUBJECT} + # Get the end time + end_time=$(date +%s) + # Calculate the time difference + execution_time=$(python3 -c "print($end_time - $start_time)") + echo "${FILESEG},${execution_time}" >> ${PATH_RESULTS}/execution_time.csv + + # # Generate QC report + # sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT} + + # Compute CSA averaged across all slices C2-C3 vertebral levels for plotting the STD across contrasts + # NOTE: this is per-level because not all contrasts have thes same FoV (C2-C3 is what all contrasts have in common) + sct_process_segmentation -i ${FILESEG}.nii.gz -vert 2:3 -vertfile ${file_gt_vert_label}_labeled.nii.gz -o $PATH_RESULTS/csa_c2c3.csv -append 1 + +} + + +# ------------------------------------------------------------------------------ +# SCRIPT STARTS HERE +# ------------------------------------------------------------------------------ +# get starting time: +start=`date +%s` + +# Display useful info for the log, such as SCT version, RAM and CPU cores available +sct_check_dependencies -short + +# Go to folder where data will be copied and processed +cd $PATH_DATA_PROCESSED + +# Copy source images +# Note: we use '/./' in order to include the sub-folder 'ses-0X' +# We do a substitution '/' --> '_' in case there is a subfolder 'ses-0X/' +rsync -Ravzh ${PATH_DATA}/./${SUBJECT}/anat/* . +# copy DWI data +rsync -Ravzh ${PATH_DATA}/./${SUBJECT}/dwi/* . + +# ------------------------------------------------------------------------------ +# DEFINE CONTRASTS +# ------------------------------------------------------------------------------ +contrasts="T1w T2w T2star flip-1_mt-on_MTS flip-2_mt-off_MTS rec-average_dwi" +# contrasts="rec-average_dwi" + +# Loop across contrasts +for contrast in ${contrasts}; do + + if [[ $contrast == "rec-average_dwi" ]]; then + type="dwi" + else + type="anat" + fi + + # go to the folder where the data is + cd ${PATH_DATA_PROCESSED}/${SUBJECT}/${type} + + # Get file name + file="${SUBJECT}_${contrast}" + + # Check if file exists + if [[ ! -e ${file}.nii.gz ]]; then + echo "File ${file}.nii.gz does not exist" >> ${PATH_LOG}/missing_files.log + echo "ERROR: File ${file}.nii.gz does not exist. Exiting." + exit 1 + fi + + # rename contrasts + if [[ $contrast == "flip-1_mt-on_MTS" ]]; then + contrast="MTon" + elif [[ $contrast == "flip-2_mt-off_MTS" ]]; then + contrast="MToff" + elif [[ $contrast == "rec-average_dwi" ]]; then + contrast="DWI" + fi + + # ------------------------------------------------------------------------------ + # COMPUTE CSA OF GT MASKS + # ------------------------------------------------------------------------------ + # # Copy GT spinal cord segmentation + # copy_gt_seg "${file}" "${type}" + + # Copy soft GT spinal cord segmentation + copy_gt_softseg_bin "${file}" "${type}" + + # Copy GT disc labels segmentation + copy_gt_disc_labels "${file}" "${type}" "${contrast}" + + # Label vertebral levels in the native resolution + label_vertebrae ${file} 't2' + + # Rename the softseg_bin GT with the shorter contrast name + FILEBIN="${file%%_*}_${contrast}_softseg_bin" + if [[ "${file}_softseg_bin.nii.gz" != "${FILEBIN}.nii.gz" ]]; then + mv ${file}_softseg_bin.nii.gz ${FILEBIN}.nii.gz + fi + + # Generate QC report + sct_qc -i ${file}.nii.gz -s ${FILEBIN}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT} + + # Compute CSA averaged across all slices C2-C3 vertebral levels for plotting the STD across contrasts + # NOTE: this is per-level because not all contrasts have thes same FoV (C2-C3 is what all contrasts have in common) + sct_process_segmentation -i ${FILEBIN}.nii.gz -vert 2:3 -vertfile ${file}_softseg_bin_labeled.nii.gz -o $PATH_RESULTS/csa_c2c3.csv -append 1 + + # ------------------------------------------------------------------------------ + # COMPUTE CSA OF AUTOMATIC PREDICTIONS + # ------------------------------------------------------------------------------ + # Segment SC (i.e. run inference) and compute CSA + # model_name=$(basename ${PATH_NNUNET_MODEL}) + # CUDA_VISIBLE_DEVICES=${CUDA_DEVICE} segment_sc_nnUNet ${file} "${file}_softseg_bin" ${model_name} ${contrast} '3d_fullres' + segment_sc ${file} "${file}_softseg_bin" ${MODEL_VERSION} ${contrast} + +done + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ + +# Display results (to easily compare integrity across SCT versions) +end=`date +%s` +runtime=$((end-start)) +echo +echo "~~~" +echo "SCT version: `sct_version`" +echo "Ran on: `uname -nsr`" +echo "Duration: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" +echo "~~~" diff --git a/scripts/compute_morphometrics_spine_generic.sh b/scripts/compute_morphometrics_spine_generic.sh new file mode 100644 index 00000000..48025221 --- /dev/null +++ b/scripts/compute_morphometrics_spine_generic.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Post-training script for computing morphometrics on spine-generic dataset using lifelong learning contrast-agnostic +# spinal cord segmentation model. +# This script is one of the steps in the automated GitHub actions for computing spinal cord morphometrics (CSA) +# +# This script performs the following tasks: +# 1. Downloads the model using sct_deepseg -install seg_sc_contrast_agnostic -custom-url +# 2. Runs a batch analysis (sct_run_batch) to compute the spinal cord cross-sectional area (CSA) on a +# mini-batch of test subjects obtained as input. +# 3. Moves the logs/ and results/ to the an output folder +# +# Usage: +# bash compute_morphometrics_spine_generic.sh + +# Exit immediately if a command exits with a non-zero status +set -e + +# ============================== +# DEFINE GLOBAL VARIABLES +# ============================== + +# get current working directory before doing anything else +CWD=${PWD} + +# get list of subjects as input +TEST_SUBJECTS=($1) +echo "Running analysis on ${TEST_SUBJECTS[@]}" + +# Path to the output folder; the data, model, results, etc. will be stored in this folder +PATH_OUTPUT="csa-analysis" + +# Path to the folder where the model exists, will be copied to the output folder PATH_OUTPUT +# for testing purposes, replace the PATH_MODEL with the path to the model downloaded from the latest release +MODEL_URL=$2 +echo "Using model at: ${MODEL_URL}" +# MODEL_URL="https://github.com/sct-pipeline/contrast-agnostic-softseg-spinalcord/releases/download/v3.1/model_contrast_agnostic_20250123.zip" + +# Get model version +MODEL_VERSION=$(echo "$MODEL_URL" | sed -E 's#.*/download/([^/]+)/.*#\1#') + +# Number of parallel processes to run (choose a smaller number as inference is run only on 1 gpu) +NUM_WORKERS=4 + +# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) +trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT + +echo "==============================" +echo "Downloading model from URL ${MODEL_URL} ..." +echo "==============================" + +sct_deepseg -install seg_sc_contrast_agnostic -custom-url ${MODEL_URL} + +echo "Model download complete." + +# ============================== +# RUN BATCH ANALYSIS +# NOTE: this section piggybacks on the sct_run_batch argument provided by SCT +# Instead of providing a config file for batch processing script, we will provide the input arguments below +# ============================== + +echo "==============================" +echo "Running batch analysis ..." +echo "==============================" + +# Run batch processing +path_out_run_batch=${PATH_OUTPUT}/batch_processing_results +echo ${path_out_run_batch} + +sct_run_batch -path-data ${PATH_OUTPUT}/data-multi-subject \ + -path-output ${path_out_run_batch} \ + -jobs ${NUM_WORKERS} \ + -script scripts/compute_csa.sh \ + -script-args "${MODEL_VERSION}" \ + -include-list ${TEST_SUBJECTS[@]} + + +echo "==============================" +echo "Copying log and results folders to ${CWD}/logs_results ..." +echo "==============================" + +mkdir -p ${CWD}/logs_results +cp -r ${path_out_run_batch}/log ${CWD}/logs_results +cp -r ${path_out_run_batch}/results ${CWD}/logs_results +# NOTE: this copying is done so that it is easy to find these folders outside of the script to be uploaded by GH Actions + +# Go back to the current working directory at the beginning +cd ${CWD} diff --git a/scripts/download_spine_generic_test_data.sh b/scripts/download_spine_generic_test_data.sh new file mode 100644 index 00000000..306349b4 --- /dev/null +++ b/scripts/download_spine_generic_test_data.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# +# Usage: +# bash compute_morphometrics_spine_generic.sh + +# Exit immediately if a command exits with a non-zero status +set -e + +# ============================== +# DEFINE GLOBAL VARIABLES +# ============================== + +# get current working directory before doing anything else +CWD=${PWD} + +# Path to the output folder; the data, model, results, etc. will be stored in this folder +# adding an extra folder interferes with the caching process of GHA +# PATH_OUTPUT="csa-analysis" + +# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) +trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT + +# ============================== +# DOWNLOAD DATA +# ============================== + +echo "==============================" +echo "Downloading test data ..." +echo "==============================" + +# Clone the dataset and initialize a git annex repository +url_dataset="https://github.com/spine-generic/data-multi-subject" +tag="r20250310" +clone_folder="data-multi-subject" + +# Ref: https://stackoverflow.com/questions/36498981/shell-dont-fail-git-clone-if-folder-already-exists/36499031#36499031 +if [ ! -d "$clone_folder" ] ; then + echo "Cloning dataset ..." + git clone --branch "$tag" "$url_dataset" "$clone_folder" + # change directory + cd $clone_folder +else + echo "Dataset already exists, skipping cloning." +fi + +# Initialize an empty git-annex repository +git annex init + +# # Extract test subjects and store them in an array +# readarray -t TEST_SUBJECTS < <(python -c 'import yaml, sys; +# test_subjects = yaml.safe_load(open(sys.argv[1]))["test"]; +# for subject in test_subjects: print(subject)' "${PATH_REPO}/scripts/spine_generic_test_split_for_csa_drift_monitoring.yaml") + +TEST_SUBJECTS=( + "sub-barcelona06" "sub-beijingPrisma01" "sub-beijingPrisma02" "sub-brnoCeitec04" + "sub-brnoUhb01" "sub-cardiff03" "sub-cmrra02" "sub-cmrra05" + "sub-cmrrb01" "sub-cmrrb03" "sub-cmrrb05" "sub-fslAchieva04" + "sub-fslPrisma01" "sub-fslPrisma02" "sub-fslPrisma04" "sub-fslPrisma05" + "sub-geneva03" "sub-juntendo750w01" "sub-juntendo750w02" "sub-juntendo750w03" + "sub-juntendo750w06" "sub-milan03" "sub-mniS03" "sub-mountSinai01" + "sub-nottwil01" "sub-nottwil04" "sub-nwu01" "sub-oxfordFmrib06" + "sub-oxfordFmrib09" "sub-oxfordFmrib10" "sub-oxfordOhba01" "sub-oxfordOhba05" + "sub-pavia02" "sub-pavia05" "sub-queensland01" "sub-sherbrooke02" + "sub-sherbrooke05" "sub-sherbrooke06" "sub-stanford04" "sub-strasbourg04" + "sub-tehranS03" "sub-tokyoIngenia05" "sub-ubc06" "sub-ucl02" + "sub-unf04" "sub-vuiisAchieva04" "sub-vuiisIngenia03" "sub-vuiisIngenia04" "sub-vuiisIngenia05" +) + +# Download test split using git-annex +for subject in "${TEST_SUBJECTS[@]}"; do + echo "Downloading: $subject" + # download images + git annex get "${subject}" + # change current working directory to derivatives + cd $PWD/derivatives + # cd derivatives + # download all kinds of labels + git annex get $(find . -name "${subject}") + # change back to root directory + cd .. +done + +echo "Dataset download complete." + +# Return to the root directory of the repo +cd ${CWD} # this will go to the root of the repository diff --git a/scripts/environment.yml b/scripts/environment.yml new file mode 100644 index 00000000..0de2db8f --- /dev/null +++ b/scripts/environment.yml @@ -0,0 +1,10 @@ +name: myenv # Name of your environment +channels: + - conda-forge +dependencies: + - python=3.10 + - git-annex + - pyyaml + - pip + # - pip: + # - other-package-you-may-need \ No newline at end of file diff --git a/scripts/generate_morphometrics_plots.sh b/scripts/generate_morphometrics_plots.sh new file mode 100644 index 00000000..55bb9ef5 --- /dev/null +++ b/scripts/generate_morphometrics_plots.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Wrapper to the .py script containing code to plot the morphometrics on spine generic test set + + +# path to the input folder containing the CSV files for each model release +PATH_IN=$1 + +# Run script to merge +python csa_generate_figures/analyse_csa_across_releases.py -i $PATH_IN + +echo "==============================" +echo "Generated CSA plots! " +echo "==============================" diff --git a/scripts/merge_csvs.py b/scripts/merge_csvs.py new file mode 100644 index 00000000..5dbd2fa8 --- /dev/null +++ b/scripts/merge_csvs.py @@ -0,0 +1,39 @@ +""" +Merge multiple CSV files into a single CSV file. +""" + +import argparse +import os +import pandas as pd + + +def get_parser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('-path-results', help='Path to the sct_run_batch output folder containing the results/ folder') + parser.add_argument('-path-output', help='Path to the output folder where the merged CSV file will be saved') + # parser.add_argument('output_csv', help='Output CSV file') + return parser + + +def merge_csvs(args): + + # Get list of CSV files + list_csv = [] + + # 49 test subjects for 15 batches with 3 subjects each, 16th batch with 4 subjects + max_batches=16 + for idx in range(1, max_batches+1): + list_csv.append(os.path.join(args.path_results, f'csa-results-batch-{idx}', 'results', 'csa_c2c3.csv')) + + # Merge CSV files + df = pd.concat([pd.read_csv(f) for f in list_csv], ignore_index=True) + + path_save = os.path.join(args.path_output, 'csa_c2c3_merged.csv') + print(f'Saving merged CSV file to: {path_save}') + df.to_csv(path_save, index=False) + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + merge_csvs(args) \ No newline at end of file diff --git a/scripts/merge_run_batch_results.sh b/scripts/merge_run_batch_results.sh new file mode 100644 index 00000000..36d136a8 --- /dev/null +++ b/scripts/merge_run_batch_results.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# ============================== +# DEFINE GLOBAL VARIABLES +# ============================== + +# path to the input folder containing the individual batch processing results +PATH_IN=$1 +# path to the output folder where the merged csv will be saved +PATH_OUT=$2 + +# Run script to merge +python scripts/merge_csvs.py -path-results $PATH_IN -path-output $PATH_OUT \ No newline at end of file diff --git a/scripts/spine_generic_test_split_for_csa_drift_monitoring.yaml b/scripts/spine_generic_test_split_for_csa_drift_monitoring.yaml new file mode 100644 index 00000000..39b4542b --- /dev/null +++ b/scripts/spine_generic_test_split_for_csa_drift_monitoring.yaml @@ -0,0 +1,68 @@ +# This file contains the frozen test split for the spine-generic `data-multi-subject` dataset on which the +# morphometric variability (CSA) was computed on using while developing the contrast-agnostic model. +# To monitor the drift in the CSA variability as the model evolves, we freeze the test split and use it to +# evaluate the model at different stages of its evolution (i.e. when new datasets and pathologies are added). +# +# How this split was obtained: +# ```python +# from sklearn.model_selection import train_test_split +# import numpy as np +# +# np.random.seed(seed=50) +# test_ratio = 0.2 +# train_subjects, test_subjects = train_test_split(all_subjects, test_size=test_ratio) +# ``` +# +# NOTE: `all_subjects` is a list of all the subjects in the spine-generic dataset +# https://github.com/spine-generic/data-multi-subject + +test: +- sub-barcelona06 +- sub-beijingPrisma01 +- sub-beijingPrisma02 +- sub-brnoCeitec04 +- sub-brnoUhb01 +- sub-cardiff03 +- sub-cmrra02 +- sub-cmrra05 +- sub-cmrrb01 +- sub-cmrrb03 +- sub-cmrrb05 +- sub-fslAchieva04 +- sub-fslPrisma01 +- sub-fslPrisma02 +- sub-fslPrisma04 +- sub-fslPrisma05 +- sub-geneva03 +- sub-juntendo750w01 +- sub-juntendo750w02 +- sub-juntendo750w03 +- sub-juntendo750w06 +- sub-milan03 +- sub-mniS03 +- sub-mountSinai01 +- sub-nottwil01 +- sub-nottwil04 +- sub-nwu01 +- sub-oxfordFmrib06 +- sub-oxfordFmrib09 +- sub-oxfordFmrib10 +- sub-oxfordOhba01 +- sub-oxfordOhba05 +- sub-pavia02 +- sub-pavia05 +- sub-queensland01 +- sub-sherbrooke02 +- sub-sherbrooke05 +- sub-sherbrooke06 +- sub-stanford04 +- sub-strasbourg04 +- sub-tehranS03 +- sub-tokyoIngenia05 +- sub-ubc06 +- sub-ucl02 +- sub-unf04 +- sub-vuiisAchieva04 +- sub-vuiisIngenia03 +- sub-vuiisIngenia04 +- sub-vuiisIngenia05