ivadomed · plbenveniste · Jun 22, 2023 · Jun 22, 2023 · Jul 18, 2023 · Jul 19, 2023
diff --git a/data_analysis/data_analysis.py b/data_analysis/data_analysis.py
@@ -0,0 +1,231 @@
+"""
+This python files performs data analysis on the canproco dataset.
+
+Args:
+    -d, --dataset-path: path to the dataset
+    -o, --output-path: path to the output directory
+
+Returns:
+    - a csv file containing the results of the analysis
+
+Example:
+    python data_analysis.py -d /path/to/dataset -o /path/to/output -c STIR,PSIR
+
+To do:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+
+import argparse
+import os
+import json
+
+
+def get_parser():
+    """
+    This function parses the arguments given to the script.
+
+    Args:
+        None
+
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='Perform data analysis on the canproco dataset')
+    parser.add_argument('-d', '--dataset-path', type=str, required=True, help='path to the dataset')
+    parser.add_argument('-o', '--output-path', type=str, required=True, help='path to the output directory')
+
+    return parser
+
+
+def main():
+    """
+    This function performs the data analysis.
+
+    Args:
+        None
+
+    Returns:
+        None
+    """
+    # Get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    # Get the arguments
+    dataset_path = args.dataset_path
+    output_path = args.output_path
+
+    #time points (for now we only work on M0)
+    time_points = ['ses-M0', 'ses-M12']
+
+    # Get the list of subjects
+    subjects = os.listdir(dataset_path)
+    subjects = [subject for subject in subjects if 'sub-' in subject]
+    print("Total number of subjects: {}".format(len(subjects)))
+
+    #initialize lists
+    subjects_all_time_points = []
+    subjects_no_M0 = []
+    subjects_no_M12 = []
+    subjects_PSIR = []
+    subjects_STIR = []
+    subjects_PSIR_STIR = []
+    subjects_no_PSIR_no_STIR = []
+    subjects_no_PSIR_no_STIR_once = []
+
+    subjects_info = {}
+
+    #Iterate over the subjects
+    for subject in subjects:
+        #iterate over the time_points
+        print("Subject: {}".format(subject))
+        sub_time_points = []
+        for time_point in time_points:
+             #if time_point exists for the subject
+            if os.path.exists(os.path.join(dataset_path, subject, time_point)):
+                sub_time_points.append(time_point)
+        print("Time points available: {}".format(sub_time_points))
+        #initialize the contrast_subject dictionary
+        contrast_subject = {}
+        for time_point in sub_time_points:
+            contrast_subject[time_point] = []
+        #iterate over the time points
+        for time_point in sub_time_points:
+            print("Time point: {}".format(time_point))
+            #get the MRI files for the subject
+            subject_path = os.path.join(dataset_path, subject, time_point, 'anat')
+            subject_files = os.listdir(subject_path)
+            subject_files = [file for file in subject_files if '.nii.gz' in file]
+            #we get the contrast for each file
+            for file in subject_files:
+                contrast_subject[time_point].append(file.split('_')[2].split('.')[0])
+            #we print the contrasts available for the subject
+            print("Contrasts available: {}".format(sorted(contrast_subject[time_point])))
+        print(contrast_subject)
+        print("-----------------------------------")
+        subject_info = {'subject': subject, 'time_points': sub_time_points, 'contrasts': contrast_subject}
+        subjects_info[subject] = subject_info
+
+        #we get the list of the subjects with all the time points
+        if len(sub_time_points) == len(time_points):
+            subjects_all_time_points.append(subject)
+        #we get the list of the subjects with no M0
+        if 'ses-M0' not in sub_time_points:
+            subjects_no_M0.append(subject)
+        #we get the list of the subjects with no M12
+        if 'ses-M12' not in sub_time_points:
+            subjects_no_M12.append(subject)
+
+        #we get the list of the subjects with PSIR at every time point that they have
+        psir_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point]:
+                psir_present = False
+        if psir_present:
+            subjects_PSIR.append(subject)
+        #we get the list of the subjects with STIR at every time point that they have
+        stir_present = True
+        for time_point in sub_time_points:
+            if 'STIR' not in contrast_subject[time_point]:
+                stir_present = False
+        if stir_present:
+            subjects_STIR.append(subject)
+        #we get the list of the subjects with PSIR and STIR at every time point that they have
+        psir_stir_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point] or 'STIR' not in contrast_subject[time_point]:
+                psir_stir_present = False
+        if psir_stir_present:
+            subjects_PSIR_STIR.append(subject)
+        #we get the list of the subjects with no PSIR and no STIR at every time point that they have
+        psir_stir_not_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' in contrast_subject[time_point] or 'STIR' in contrast_subject[time_point]:
+                psir_stir_not_present = False
+        if psir_stir_not_present:
+            subjects_no_PSIR_no_STIR.append(subject)
+        #we get the list of the subjects with no PSIR and no STIR at least once
+        psir_stir_not_present_once = False
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point] and 'STIR' not in contrast_subject[time_point]:
+                psir_stir_not_present_once = True
+        if psir_stir_not_present_once:
+            subjects_no_PSIR_no_STIR_once.append(subject)
+
+    #we print the results
+    print("Total number of subjects: {}".format(len(subjects)))
+    print("Number of subjects with all time points: {}".format(len(subjects_all_time_points)))
+    print("Number of subjects with no M0: {}".format(len(subjects_no_M0)))
+    print("Number of subjects with no M12: {}".format(len(subjects_no_M12)))
+    print("Number of subjects with PSIR at every time point they have: {}".format(len(subjects_PSIR)))
+    print("Number of subjects with STIR at every time point they have: {}".format(len(subjects_STIR)))
+    print("Number of subjects with PSIR and STIR at every time point they have: {}".format(len(subjects_PSIR_STIR)))
+    print("Number of subjects with no PSIR and no STIR at every time point they have: {}".format(len(subjects_no_PSIR_no_STIR)))
+    print("Number of subjects with no PSIR and no STIR at least once: {}".format(len(subjects_no_PSIR_no_STIR_once)))
+    print("-----------------------------------")
+
+    #we save the subjects_info dictionary in a json file
+    with open(os.path.join(output_path, 'subjects_info.json'), 'w') as fp:
+        json.dump(subjects_info, fp, indent=4)
+
+    #we write a txt file with the results
+    with open(os.path.join(output_path, 'results.txt'), 'w') as f:
+        f.write("Total number of subjects: {}\n".format(len(subjects)))
+        f.write("Number of subjects with all time points: {}\n".format(len(subjects_all_time_points)))
+        f.write("Number of subjects with no M0: {}\n".format(len(subjects_no_M0)))
+        f.write("Number of subjects with no M12: {}\n".format(len(subjects_no_M12)))
+        f.write("Number of subjects with PSIR at every time point they have: {}\n".format(len(subjects_PSIR)))
+        f.write("Number of subjects with STIR at every time point they have: {}\n".format(len(subjects_STIR)))
+        f.write("Number of subjects with PSIR and STIR at every time point they have: {}\n".format(len(subjects_PSIR_STIR)))
+        f.write("Number of subjects with no PSIR and no STIR at every time point they have: {}\n".format(len(subjects_no_PSIR_no_STIR)))
+        f.write("Number of subjects with no PSIR and no STIR at least once: {}\n".format(len(subjects_no_PSIR_no_STIR_once)))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with all time points:\n")
+        for subject in subjects_all_time_points:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no M0:\n")
+        for subject in subjects_no_M0:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no M12:\n")
+        for subject in subjects_no_M12:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with PSIR at every time point they have:\n")
+        for subject in subjects_PSIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with STIR at every time point they have:\n")
+        for subject in subjects_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with PSIR and STIR at every time point they have:\n")
+        for subject in subjects_PSIR_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no PSIR and no STIR at every time point they have:\n")
+        for subject in subjects_no_PSIR_no_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no PSIR and no STIR at least once:\n")
+        for subject in subjects_no_PSIR_no_STIR_once:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+
+    return None
+
+
+if __name__ == '__main__':
+
+    main()
+
+
+
+
+
+
diff --git a/lesion_analysis/lesion_seg_analysis.py b/lesion_analysis/lesion_seg_analysis.py
@@ -0,0 +1,152 @@
+"""
+In this file we analyse the results of the segmentation of the MS lesion on the spinal cord.
+The objective is to output the number of lesions segmented on the spinal cord for each patient.
+Also, we want to output the segmentation file of the lesions in different color
+
+Usage:
+    python lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder>
+
+Args:
+    -i/--input_image: path to the image
+    -seg/--segmentation: path to the segmentation
+    -o/--output_folder: path to the output folder
+    --plot: whether to plot the results or not
+
+Returns:
+    None
+
+Todo:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+from pathlib import Path
+import nibabel as nib
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import DBSCAN
+
+
+def get_parser():
+    """
+    This function parses the arguments given to the script.
+
+    Args:
+        None
+
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='Analyse the results of the segmentation of the MS lesion on the spinal cord.')
+    parser.add_argument('-i', '--input_image', required=True,
+                        help='Path to the image')
+    parser.add_argument('-seg', '--segmentation', required=True,
+                        help='Path to the segmentation')
+    parser.add_argument('-o', '--output_folder', required=True,
+                        help='Path to the output folder')
+    parser.add_argument('--plot', action='store_true',
+                        help='Whether to plot the results or not')
+    return parser
+
+
+def main():
+    """
+    This function is the main function of the script.
+
+    Args:
+        None
+
+    Returns:
+        None
+    """
+    #get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    #load image
+    img = nib.load(args.input_image)
+    img_data = img.get_fdata()
+
+    #load segmentation
+    seg = nib.load(args.segmentation)
+    seg_data = seg.get_fdata()
+
+    #perform clustering on the entire segmentation volume
+    ##first we modify the seg data
+    X = []
+    Y = []
+    Z = []
+    for x in range(seg_data.shape[0]):
+        for y in range(seg_data.shape[1]):
+            for z in range(seg_data.shape[2]):
+                if seg_data[x,y,z] != 0:
+                    X.append(x)
+                    Y.append(y)
+                    Z.append(z)
+    coords = np.stack((X,Y,Z), axis=1)
+
+    ##then we perform the clustering using DBSCAN
+    db = DBSCAN(eps=10, min_samples=5).fit(coords)
+    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
+    core_samples_mask[db.core_sample_indices_] = True
+    labels = db.labels_
+    # Number of clusters in labels, ignoring noise if present.
+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+
+    print('Estimated number of clusters: %d' % n_clusters_)
+
+    plot = args.plot
+    if plot:
+        #build color dictionnary
+        colors = {}
+        for i in range(n_clusters_):
+            colors[i] = np.random.rand(3,)
+        colors[-1] = [0,0,0]
+
+        #plot the clusters for each slice
+        fig, axs = plt.subplots(ncols=seg_data.shape[2], nrows=1,figsize=(20,3))
+        for i in range(seg_data.shape[2]):
+            slice_coords = coords[coords[:,2] == i]
+            slice_labels = labels[coords[:,2] == i]
+            axs[i].scatter(slice_coords[:,0], slice_coords[:,1], color=[colors[x] for x in slice_labels])
+            # plt.savefig(os.path.join(args.output_folder, f'cluster_slice_{i}.png'))
+            # plt.close()
+            axs[i].set_xlim(0,seg_data.shape[0])   
+            axs[i].set_ylim(0,seg_data.shape[1])
+        plt.show()
+
+    #saving the results in a nifti file
+    #first we create the new segmentation
+    new_seg_data = np.zeros_like(seg_data)
+    for i in range(len(labels)):
+        new_seg_data[coords[i,0], coords[i,1], coords[i,2]] = labels[i] + 1
+    #then we save it
+    new_seg = nib.Nifti1Image(new_seg_data, seg.affine, seg.header)
+    nib.save(new_seg, os.path.join(args.output_folder, 'clustered_seg.nii.gz'))
+
+    #for each lesion calculate volume and get center
+    ##first we get the volume of one voxel
+    voxel_volume = np.prod(seg.header.get_zooms())
+    ##then we get the volume of each lesion and its center
+    lesion_volumes = []
+    lesion_centers = []
+    for i in range(n_clusters_):
+        lesion_volumes.append(len(labels[labels == i])*voxel_volume)
+        lesion_centers.append(np.mean(coords[labels == i], axis=0))
+
+    #save the results in a text file
+    with open(os.path.join(args.output_folder, 'lesion_analysis.txt'), 'w') as f:
+        f.write(f'Number of lesions: {n_clusters_}\n')
+        f.write('Volume and center of each lesion (mm3):\n')
+        for i in range(n_clusters_):
+            f.write(f'Lesion {i+1} : volume: {round(lesion_volumes[i],2)} mm3, center: {lesion_centers[i]}\n')
+    return None
+
+
+if __name__ == '__main__':
+    main()
+