-
Notifications
You must be signed in to change notification settings - Fork 1
Temporal analysis of MS lesions #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 17 commits
8739639
c495a26
cfe61fd
24bcb0c
919477f
926078d
8b8e1a9
65138b0
fc33182
db106a4
511c168
6e70cb1
86a77d9
51374e1
7188349
c63ba5f
b195245
c9ab813
11a1f5b
ee3e865
af03d7e
4b7d247
6406dac
accea6e
f18e846
cf8ca41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
""" | ||
This python files performs data analysis on the canproco dataset. | ||
|
||
Args: | ||
-d, --dataset-path: path to the dataset | ||
-o, --output-path: path to the output directory | ||
|
||
Returns: | ||
- a csv file containing the results of the analysis | ||
|
||
Example: | ||
python data_analysis.py -d /path/to/dataset -o /path/to/output -c STIR,PSIR | ||
plbenveniste marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
To do: | ||
* | ||
|
||
Pierre-Louis Benveniste | ||
""" | ||
|
||
|
||
import argparse | ||
import os | ||
import json | ||
|
||
|
||
def get_parser(): | ||
""" | ||
This function parses the arguments given to the script. | ||
|
||
Args: | ||
None | ||
|
||
Returns: | ||
parser: parser containing the arguments | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='Perform data analysis on the canproco dataset') | ||
parser.add_argument('-d', '--dataset-path', type=str, required=True, help='path to the dataset') | ||
parser.add_argument('-o', '--output-path', type=str, required=True, help='path to the output directory') | ||
|
||
return parser | ||
|
||
|
||
def main(): | ||
""" | ||
This function performs the data analysis. | ||
|
||
Args: | ||
None | ||
|
||
Returns: | ||
None | ||
""" | ||
# Get the parser | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
|
||
# Get the arguments | ||
dataset_path = args.dataset_path | ||
output_path = args.output_path | ||
|
||
#time points (for now we only work on M0) | ||
time_points = ['ses-M0', 'ses-M12'] | ||
|
||
# Get the list of subjects | ||
subjects = os.listdir(dataset_path) | ||
subjects = [subject for subject in subjects if 'sub-' in subject] | ||
print("Total number of subjects: {}".format(len(subjects))) | ||
|
||
#initialize lists | ||
subjects_all_time_points = [] | ||
subjects_no_M0 = [] | ||
subjects_no_M12 = [] | ||
subjects_PSIR = [] | ||
subjects_STIR = [] | ||
subjects_PSIR_STIR = [] | ||
subjects_no_PSIR_no_STIR = [] | ||
subjects_no_PSIR_no_STIR_once = [] | ||
|
||
subjects_info = {} | ||
|
||
#Iterate over the subjects | ||
for subject in subjects: | ||
#iterate over the time_points | ||
print("Subject: {}".format(subject)) | ||
sub_time_points = [] | ||
for time_point in time_points: | ||
#if time_point exists for the subject | ||
if os.path.exists(os.path.join(dataset_path, subject, time_point)): | ||
sub_time_points.append(time_point) | ||
print("Time points available: {}".format(sub_time_points)) | ||
#initialize the contrast_subject dictionary | ||
contrast_subject = {} | ||
for time_point in sub_time_points: | ||
contrast_subject[time_point] = [] | ||
#iterate over the time points | ||
for time_point in sub_time_points: | ||
print("Time point: {}".format(time_point)) | ||
#get the MRI files for the subject | ||
subject_path = os.path.join(dataset_path, subject, time_point, 'anat') | ||
subject_files = os.listdir(subject_path) | ||
subject_files = [file for file in subject_files if '.nii.gz' in file] | ||
#we get the contrast for each file | ||
for file in subject_files: | ||
contrast_subject[time_point].append(file.split('_')[2].split('.')[0]) | ||
#we print the contrasts available for the subject | ||
print("Contrasts available: {}".format(sorted(contrast_subject[time_point]))) | ||
print(contrast_subject) | ||
print("-----------------------------------") | ||
subject_info = {'subject': subject, 'time_points': sub_time_points, 'contrasts': contrast_subject} | ||
subjects_info[subject] = subject_info | ||
|
||
#we get the list of the subjects with all the time points | ||
if len(sub_time_points) == len(time_points): | ||
subjects_all_time_points.append(subject) | ||
#we get the list of the subjects with no M0 | ||
if 'ses-M0' not in sub_time_points: | ||
subjects_no_M0.append(subject) | ||
#we get the list of the subjects with no M12 | ||
if 'ses-M12' not in sub_time_points: | ||
subjects_no_M12.append(subject) | ||
|
||
#we get the list of the subjects with PSIR at every time point that they have | ||
psir_present = True | ||
for time_point in sub_time_points: | ||
if 'PSIR' not in contrast_subject[time_point]: | ||
psir_present = False | ||
if psir_present: | ||
subjects_PSIR.append(subject) | ||
#we get the list of the subjects with STIR at every time point that they have | ||
stir_present = True | ||
for time_point in sub_time_points: | ||
if 'STIR' not in contrast_subject[time_point]: | ||
stir_present = False | ||
if stir_present: | ||
subjects_STIR.append(subject) | ||
#we get the list of the subjects with PSIR and STIR at every time point that they have | ||
psir_stir_present = True | ||
for time_point in sub_time_points: | ||
if 'PSIR' not in contrast_subject[time_point] or 'STIR' not in contrast_subject[time_point]: | ||
psir_stir_present = False | ||
if psir_stir_present: | ||
subjects_PSIR_STIR.append(subject) | ||
#we get the list of the subjects with no PSIR and no STIR at every time point that they have | ||
psir_stir_not_present = True | ||
for time_point in sub_time_points: | ||
if 'PSIR' in contrast_subject[time_point] or 'STIR' in contrast_subject[time_point]: | ||
psir_stir_not_present = False | ||
if psir_stir_not_present: | ||
subjects_no_PSIR_no_STIR.append(subject) | ||
#we get the list of the subjects with no PSIR and no STIR at least once | ||
psir_stir_not_present_once = False | ||
for time_point in sub_time_points: | ||
if 'PSIR' not in contrast_subject[time_point] and 'STIR' not in contrast_subject[time_point]: | ||
psir_stir_not_present_once = True | ||
if psir_stir_not_present_once: | ||
subjects_no_PSIR_no_STIR_once.append(subject) | ||
|
||
#we print the results | ||
print("Total number of subjects: {}".format(len(subjects))) | ||
print("Number of subjects with all time points: {}".format(len(subjects_all_time_points))) | ||
print("Number of subjects with no M0: {}".format(len(subjects_no_M0))) | ||
print("Number of subjects with no M12: {}".format(len(subjects_no_M12))) | ||
print("Number of subjects with PSIR at every time point they have: {}".format(len(subjects_PSIR))) | ||
print("Number of subjects with STIR at every time point they have: {}".format(len(subjects_STIR))) | ||
print("Number of subjects with PSIR and STIR at every time point they have: {}".format(len(subjects_PSIR_STIR))) | ||
print("Number of subjects with no PSIR and no STIR at every time point they have: {}".format(len(subjects_no_PSIR_no_STIR))) | ||
print("Number of subjects with no PSIR and no STIR at least once: {}".format(len(subjects_no_PSIR_no_STIR_once))) | ||
print("-----------------------------------") | ||
|
||
#we save the subjects_info dictionary in a json file | ||
with open(os.path.join(output_path, 'subjects_info.json'), 'w') as fp: | ||
json.dump(subjects_info, fp, indent=4) | ||
|
||
#we write a txt file with the results | ||
with open(os.path.join(output_path, 'results.txt'), 'w') as f: | ||
f.write("Total number of subjects: {}\n".format(len(subjects))) | ||
f.write("Number of subjects with all time points: {}\n".format(len(subjects_all_time_points))) | ||
f.write("Number of subjects with no M0: {}\n".format(len(subjects_no_M0))) | ||
f.write("Number of subjects with no M12: {}\n".format(len(subjects_no_M12))) | ||
f.write("Number of subjects with PSIR at every time point they have: {}\n".format(len(subjects_PSIR))) | ||
f.write("Number of subjects with STIR at every time point they have: {}\n".format(len(subjects_STIR))) | ||
f.write("Number of subjects with PSIR and STIR at every time point they have: {}\n".format(len(subjects_PSIR_STIR))) | ||
f.write("Number of subjects with no PSIR and no STIR at every time point they have: {}\n".format(len(subjects_no_PSIR_no_STIR))) | ||
f.write("Number of subjects with no PSIR and no STIR at least once: {}\n".format(len(subjects_no_PSIR_no_STIR_once))) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with all time points:\n") | ||
for subject in subjects_all_time_points: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with no M0:\n") | ||
for subject in subjects_no_M0: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with no M12:\n") | ||
for subject in subjects_no_M12: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with PSIR at every time point they have:\n") | ||
for subject in subjects_PSIR: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with STIR at every time point they have:\n") | ||
for subject in subjects_STIR: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with PSIR and STIR at every time point they have:\n") | ||
for subject in subjects_PSIR_STIR: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with no PSIR and no STIR at every time point they have:\n") | ||
for subject in subjects_no_PSIR_no_STIR: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
f.write("Subjects with no PSIR and no STIR at least once:\n") | ||
for subject in subjects_no_PSIR_no_STIR_once: | ||
f.write("{}\n".format(subject)) | ||
f.write("-----------------------------------\n") | ||
|
||
return None | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
main() | ||
|
||
|
||
|
||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. apparently this file is not used anymore-- so, consider removing. Also, consider discussing why not using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also see #38 (comment) |
||
In this file we analyse the results of the segmentation of the MS lesion on the spinal cord. | ||
The objective is to output the number of lesions segmented on the spinal cord for each patient. | ||
Also, we want to output the segmentation file of the lesions in different color | ||
|
||
Usage: | ||
python lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder> | ||
|
||
Args: | ||
-i/--input_image: path to the image | ||
-seg/--segmentation: path to the segmentation | ||
-o/--output_folder: path to the output folder | ||
--plot: whether to plot the results or not | ||
|
||
Returns: | ||
None | ||
|
||
Todo: | ||
* | ||
|
||
Pierre-Louis Benveniste | ||
""" | ||
|
||
import os | ||
import argparse | ||
from pathlib import Path | ||
import nibabel as nib | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from sklearn.cluster import DBSCAN | ||
|
||
|
||
def get_parser(): | ||
""" | ||
This function parses the arguments given to the script. | ||
|
||
Args: | ||
None | ||
|
||
Returns: | ||
parser: parser containing the arguments | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='Analyse the results of the segmentation of the MS lesion on the spinal cord.') | ||
parser.add_argument('-i', '--input_image', required=True, | ||
help='Path to the image') | ||
parser.add_argument('-seg', '--segmentation', required=True, | ||
help='Path to the segmentation') | ||
parser.add_argument('-o', '--output_folder', required=True, | ||
help='Path to the output folder') | ||
parser.add_argument('--plot', action='store_true', | ||
help='Whether to plot the results or not') | ||
return parser | ||
|
||
plbenveniste marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def main(): | ||
""" | ||
This function is the main function of the script. | ||
|
||
Args: | ||
None | ||
|
||
Returns: | ||
None | ||
""" | ||
#get the parser | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
|
||
#load image | ||
img = nib.load(args.input_image) | ||
img_data = img.get_fdata() | ||
|
||
#load segmentation | ||
seg = nib.load(args.segmentation) | ||
seg_data = seg.get_fdata() | ||
|
||
#perform clustering on the entire segmentation volume | ||
##first we modify the seg data | ||
X = [] | ||
Y = [] | ||
Z = [] | ||
for x in range(seg_data.shape[0]): | ||
for y in range(seg_data.shape[1]): | ||
for z in range(seg_data.shape[2]): | ||
if seg_data[x,y,z] != 0: | ||
X.append(x) | ||
Y.append(y) | ||
Z.append(z) | ||
coords = np.stack((X,Y,Z), axis=1) | ||
|
||
##then we perform the clustering using DBSCAN | ||
db = DBSCAN(eps=10, min_samples=5).fit(coords) | ||
core_samples_mask = np.zeros_like(db.labels_, dtype=bool) | ||
core_samples_mask[db.core_sample_indices_] = True | ||
labels = db.labels_ | ||
# Number of clusters in labels, ignoring noise if present. | ||
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) | ||
|
||
print('Estimated number of clusters: %d' % n_clusters_) | ||
|
||
plot = args.plot | ||
if plot: | ||
#build color dictionnary | ||
colors = {} | ||
for i in range(n_clusters_): | ||
colors[i] = np.random.rand(3,) | ||
colors[-1] = [0,0,0] | ||
|
||
#plot the clusters for each slice | ||
fig, axs = plt.subplots(ncols=seg_data.shape[2], nrows=1,figsize=(20,3)) | ||
for i in range(seg_data.shape[2]): | ||
slice_coords = coords[coords[:,2] == i] | ||
slice_labels = labels[coords[:,2] == i] | ||
axs[i].scatter(slice_coords[:,0], slice_coords[:,1], color=[colors[x] for x in slice_labels]) | ||
# plt.savefig(os.path.join(args.output_folder, f'cluster_slice_{i}.png')) | ||
# plt.close() | ||
axs[i].set_xlim(0,seg_data.shape[0]) | ||
axs[i].set_ylim(0,seg_data.shape[1]) | ||
plt.show() | ||
|
||
#saving the results in a nifti file | ||
#first we create the new segmentation | ||
new_seg_data = np.zeros_like(seg_data) | ||
for i in range(len(labels)): | ||
new_seg_data[coords[i,0], coords[i,1], coords[i,2]] = labels[i] + 1 | ||
#then we save it | ||
new_seg = nib.Nifti1Image(new_seg_data, seg.affine, seg.header) | ||
nib.save(new_seg, os.path.join(args.output_folder, 'clustered_seg.nii.gz')) | ||
|
||
#for each lesion calculate volume and get center | ||
##first we get the volume of one voxel | ||
voxel_volume = np.prod(seg.header.get_zooms()) | ||
##then we get the volume of each lesion and its center | ||
lesion_volumes = [] | ||
lesion_centers = [] | ||
for i in range(n_clusters_): | ||
lesion_volumes.append(len(labels[labels == i])*voxel_volume) | ||
lesion_centers.append(np.mean(coords[labels == i], axis=0)) | ||
|
||
#save the results in a text file | ||
with open(os.path.join(args.output_folder, 'lesion_analysis.txt'), 'w') as f: | ||
f.write(f'Number of lesions: {n_clusters_}\n') | ||
f.write('Volume and center of each lesion (mm3):\n') | ||
for i in range(n_clusters_): | ||
f.write(f'Lesion {i+1} : volume: {round(lesion_volumes[i],2)} mm3, center: {lesion_centers[i]}\n') | ||
return None | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
|
Uh oh!
There was an error while loading. Please reload this page.