diff --git a/build.m b/build.m new file mode 100644 index 0000000..ebc46ff --- /dev/null +++ b/build.m @@ -0,0 +1,5 @@ +addpath(genpath('/N/u/brlife/git/vistasoft')) +addpath(genpath('/N/u/brlife/git/jsonlab')) +addpath(genpath('/N/u/brlife/git/wma_tools')) +mcc -m -R -nodisplay -d compiled parcRoiStats +exit diff --git a/compile.sh b/compile.sh new file mode 100644 index 0000000..32953b8 --- /dev/null +++ b/compile.sh @@ -0,0 +1,13 @@ +#!/bin/bash +module load matlab/2019a + +mkdir compiled + +cat > build.m <>mcrinstaller + +at the MATLAB prompt. + +Alternatively, download and install the Linux version of the MATLAB Runtime for R2019a +from the following link on the MathWorks website: + + http://www.mathworks.com/products/compiler/mcr/index.html + +For more information about the MATLAB Runtime and the MATLAB Runtime installer, see +"Distribute Applications" in the MATLAB Compiler documentation +in the MathWorks Documentation Center. + +2. Files to Deploy and Package + +Files to Package for Standalone +================================ +-parcRoiStats +-run_parcRoiStats.sh (shell script for temporarily setting environment variables and + executing the application) + -to run the shell script, type + + ./run_parcRoiStats.sh + + at Linux or Mac command prompt. is the directory + where version 9.6 of the MATLAB Runtime is installed or the directory where + MATLAB is installed on the machine. is all the + arguments you want to pass to your application. For example, + + If you have version 9.6 of the MATLAB Runtime installed in + /mathworks/home/application/v96, run the shell script as: + + ./run_parcRoiStats.sh /mathworks/home/application/v96 + + If you have MATLAB installed in /mathworks/devel/application/matlab, + run the shell script as: + + ./run_parcRoiStats.sh /mathworks/devel/application/matlab +-MCRInstaller.zip + Note: if end users are unable to download the MATLAB Runtime using the + instructions in the previous section, include it when building your + component by clicking the "Runtime included in package" link in the + Deployment Tool. +-This readme file + + + +3. Definitions + +For information on deployment terminology, go to +http://www.mathworks.com/help and select MATLAB Compiler > +Getting Started > About Application Deployment > +Deployment Product Terms in the MathWorks Documentation +Center. + +4. Appendix + +A. Linux systems: +In the following directions, replace MR/v96 by the directory on the target machine where + MATLAB is installed, or MR by the directory where the MATLAB Runtime is installed. + +(1) Set the environment variable XAPPLRESDIR to this value: + +MR/v96/X11/app-defaults + + +(2) If the environment variable LD_LIBRARY_PATH is undefined, set it to the following: + +MR/v96/runtime/glnxa64:MR/v96/bin/glnxa64:MR/v96/sys/os/glnxa64:MR/v96/sys/opengl/lib/glnxa64 + +If it is defined, set it to the following: + +${LD_LIBRARY_PATH}:MR/v96/runtime/glnxa64:MR/v96/bin/glnxa64:MR/v96/sys/os/glnxa64:MR/v96/sys/opengl/lib/glnxa64 + + For more detailed information about setting the MATLAB Runtime paths, see Package and + Distribute in the MATLAB Compiler documentation in the MathWorks Documentation Center. + + + + NOTE: To make these changes persistent after logout on Linux + or Mac machines, modify the .cshrc file to include this + setenv command. + NOTE: The environment variable syntax utilizes forward + slashes (/), delimited by colons (:). + NOTE: When deploying standalone applications, you can + run the shell script file run_parcRoiStats.sh + instead of setting environment variables. See + section 2 "Files to Deploy and Package". + + + + + + diff --git a/compiled/requiredMCRProducts.txt b/compiled/requiredMCRProducts.txt new file mode 100644 index 0000000..3ed6af0 --- /dev/null +++ b/compiled/requiredMCRProducts.txt @@ -0,0 +1 @@ +35000 35010 \ No newline at end of file diff --git a/compiled/run_parcRoiStats.sh b/compiled/run_parcRoiStats.sh new file mode 100755 index 0000000..7a9c815 --- /dev/null +++ b/compiled/run_parcRoiStats.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# script for execution of deployed applications +# +# Sets up the MATLAB Runtime environment for the current $ARCH and executes +# the specified command. +# +exe_name=$0 +exe_dir=`dirname "$0"` +echo "------------------------------------------" +if [ "x$1" = "x" ]; then + echo Usage: + echo $0 \ args +else + echo Setting up environment variables + MCRROOT="$1" + echo --- + LD_LIBRARY_PATH=.:${MCRROOT}/runtime/glnxa64 ; + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/bin/glnxa64 ; + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/os/glnxa64; + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/opengl/lib/glnxa64; + export LD_LIBRARY_PATH; + echo LD_LIBRARY_PATH is ${LD_LIBRARY_PATH}; + shift 1 + args= + while [ $# -gt 0 ]; do + token=$1 + args="${args} \"${token}\"" + shift + done + eval "\"${exe_dir}/parcRoiStats\"" $args +fi +exit + diff --git a/create-data-csv-old.py b/create-data-csv-old.py deleted file mode 100755 index 5b02135..0000000 --- a/create-data-csv-old.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python - -import csv -import json -import numpy -import glob -import sys -import os -import pandas as pd - -with open('config.json') as config_f: - config = json.load(config_f) - output_dir = config["freesurfer"] - parc = config["parcellation"] - -# left hemisphere -parc_l_verts = {} -parc_l_surf_area = {} -parc_l_gray_vol = {} -parc_l_thick = {} -parc_l_mean_curv = {} -parc_l_gaus_curv = {} -parc_l_fold_ind = {} -parc_l_curv_ind = {} - -stat = open(output_dir+'/stats/lh.'+parc+'.stats') -for line in stat.readlines(): - if line[0] == "#": - continue - values = line.split() - struct_name = values[0] - num_vert = int(values[1]) - surf_area = int(values[2]) - gray_vol = int(values[3]) - thick_avg = float(values[4]) - thick_std = float(values[5]) - mean_curv = float(values[6]) - gaus_curv = float(values[7]) - fold_ind = int(values[8]) - curv_ind = float(values[9]) - - #print index, struct_name, n_voxels - if not struct_name in parc_l_gray_vol: - parc_l_verts[struct_name] = [] - parc_l_surf_area[struct_name] = [] - parc_l_gray_vol[struct_name] = [] - parc_l_thick[struct_name] = [] - parc_l_mean_curv[struct_name] = [] - parc_l_gaus_curv[struct_name] = [] - parc_l_fold_ind[struct_name] = [] - parc_l_curv_ind[struct_name] = [] - parc_l_verts[struct_name] = num_vert - parc_l_surf_area[struct_name] = surf_area - parc_l_gray_vol[struct_name] = gray_vol - parc_l_thick[struct_name] = thick_avg - parc_l_mean_curv[struct_name] = mean_curv - parc_l_gaus_curv[struct_name] = gaus_curv - parc_l_fold_ind[struct_name] = fold_ind - parc_l_curv_ind[struct_name] = curv_ind - -dl = {'col1': parc_l_verts.keys(),'col2': parc_l_verts.values(),'col3': parc_l_surf_area.values(),'col4': parc_l_gray_vol.values(),'col5': parc_l_thick.values(),'col6': parc_l_mean_curv.values(),'col7': parc_l_gaus_curv.values(),'col8': parc_l_fold_ind.values(),'col9': parc_l_curv_ind.values()} -dfl = pd.DataFrame(data=dl) -dfl = dfl.rename(columns={'col1': "label name",'col2': "number of vertices",'col3': "surface area",'col4': "volume",'col5': "cortical thickness",'col6': "mean curvature",'col7': "gaussian curvature",'col8': "fold index",'col9': "curvature index"}) -dfl.to_csv('lh.'+parc+'.csv',index=False) - -# right hemisphere -parc_r_verts = {} -parc_r_surf_area = {} -parc_r_gray_vol = {} -parc_r_thick = {} -parc_r_mean_curv = {} -parc_r_gaus_curv = {} -parc_r_fold_ind = {} -parc_r_curv_ind = {} - -stat = open(output_dir+'/stats/rh.'+parc+'.stats') -for line in stat.readlines(): - if line[0] == "#": - continue - values = line.split() - struct_name = values[0] - num_vert = int(values[1]) - surf_area = int(values[2]) - gray_vol = int(values[3]) - thick_avg = float(values[4]) - thick_std = float(values[5]) - mean_curv = float(values[6]) - gaus_curv = float(values[7]) - fold_ind = int(values[8]) - curv_ind = float(values[9]) - - #print index, struct_name, n_voxels - if not struct_name in parc_r_gray_vol: - parc_r_verts[struct_name] = [] - parc_r_surf_area[struct_name] = [] - parc_r_gray_vol[struct_name] = [] - parc_r_thick[struct_name] = [] - parc_r_mean_curv[struct_name] = [] - parc_r_gaus_curv[struct_name] = [] - parc_r_fold_ind[struct_name] = [] - parc_r_curv_ind[struct_name] = [] - parc_r_verts[struct_name] = num_vert - parc_r_surf_area[struct_name] = surf_area - parc_r_gray_vol[struct_name] = gray_vol - parc_r_thick[struct_name] = thick_avg - parc_r_mean_curv[struct_name] = mean_curv - parc_r_gaus_curv[struct_name] = gaus_curv - parc_r_fold_ind[struct_name] = fold_ind - parc_r_curv_ind[struct_name] = curv_ind - -dr = {'col1': parc_r_verts.keys(),'col2': parc_r_verts.values(),'col3': parc_r_surf_area.values(),'col4': parc_r_gray_vol.values(),'col5': parc_r_thick.values(),'col6': parc_r_mean_curv.values(),'col7': parc_r_gaus_curv.values(),'col8': parc_r_fold_ind.values(),'col9': parc_r_curv_ind.values()} -dfr = pd.DataFrame(data=dl) -dfr = dfl.rename(columns={'col1': "label name",'col2': "number of vertices",'col3': "surface area",'col4': "volume",'col5': "cortical thickness",'col6': "mean curvature",'col7': "gaussian curvature",'col8': "fold index",'col9': "curvature index"}) -dfr.to_csv('rh.'+parc+'.csv',index=False) \ No newline at end of file diff --git a/create_data_csv.py b/create_data_csv.py index 3ce22c0..54dd675 100755 --- a/create_data_csv.py +++ b/create_data_csv.py @@ -9,49 +9,121 @@ import pandas as pd from freesurfer_stats import CorticalParcellationStats -def extract_wm_stats(input_data_lines): +def extract_wholebrain_stats(input_data_lines,version): + if version == 'v5': + tissueName = 'Cortical' + tissueNameLower = 'cortical' + else: + tissueName = 'Cerebral' + tissueNameLower = 'cerebral' + + lines_var = input_data_lines.readlines() + dataStructure = {} + + for tissues in ['gm','wm']: + if tissues == 'gm': + name = 'CortexVol' + total_name = 'Total cortical gray matter volume' + else: + name = tissueName+'WhiteMatter' + total_name = 'Total '+tissueNameLower+' white matter volume' + + dataStructure[tissues] = {} + dataStructure[tissues]['lh_volume'] = float(lines_var[lines_var.index([ f for f in lines_var if 'lh'+name in f ][0])].replace(',','').split()[10]) + dataStructure[tissues]['rh_volume'] = float(lines_var[lines_var.index([ f for f in lines_var if 'rh'+name in f ][0])].replace(',','').split()[10]) + dataStructure[tissues]['total_volume'] = float(lines_var[lines_var.index([ f for f in lines_var if total_name in f ][0])].replace(',','').split()[9]) + + dataStructure['brain'] = {} + dataStructure['brain']['total_volume'] = float(lines_var[lines_var.index([ f for f in lines_var if 'BrainSegVol' in f ][0])].replace(',','').split()[7]) + dataStructure['brain']['total_intracranial_volume'] = float(lines_var[lines_var.index([ f for f in lines_var if 'EstimatedTotalIntraCranialVol' in f ][0])].replace(',','').split()[8]) + + return dataStructure + +def extract_subcortical_stats(input_data_lines,version,subjectID): + lines_var = input_data_lines.readlines() - lh = lines_var[14] - lh = lh.replace(',','') - rh = lines_var[15] - rh = rh.replace(',','') - tot = lines_var[16] - tot = tot.replace(',','') - lh_wm_vol = float(lh.split()[10]) - rh_wm_vol = float(rh.split()[10]) - tot_wm_vol = float(tot.split()[9]) + subcort_data = [ f for f in lines_var if '#' not in f ] - return [lh_wm_vol,rh_wm_vol,tot_wm_vol] + outdata = pd.DataFrame(columns={'segID','subjectID','structureID', 'nodeID', 'number_of_voxels', 'gray_matter_volume_mm^3'}) + outdata['segID'] = [ f.split()[1] for f in subcort_data ] + outdata['structureID'] = [ f.split()[4] for f in subcort_data ] + outdata['subjectID'] = [ subjectID for f in range(len(outdata['structureID'])) ] + outdata['nodeID'] = [ 1 for f in range(len(outdata['structureID'])) ] + outdata['number_of_voxels'] = [ f.split()[2] for f in subcort_data ] + outdata['gray_matter_volume_mm^3'] = [ f.split()[3] for f in subcort_data ] + outdata = outdata.reindex(columns=['segID','subjectID','structureID', 'nodeID', 'number_of_voxels', 'gray_matter_volume_mm^3']) + + return outdata + +def create_wholebrain_csv(wb_data,lh_data,rh_data,subjectID): + whole_brain = pd.DataFrame([],dtype=object) + whole_brain = whole_brain.append({'subjectID': subjectID},ignore_index=True) + whole_brain.insert(1,"Total_Brain_volume",wb_data['brain']['total_volume'],True) + whole_brain.insert(2,"Total_Intracranial_volume",wb_data['brain']['total_intracranial_volume'],True) + whole_brain.insert(3,"Total_Gray_Matter_volume",wb_data['gm']['total_volume'],True) + whole_brain.insert(4,"Total_White_Matter_volume",wb_data['wm']['total_volume'],True) + whole_brain.insert(5,"Left_Hemisphere_Gray_Matter_volume",wb_data['gm']['lh_volume'],True) + whole_brain.insert(6,"Right_Hemisphere_Gray_Matter_volume",wb_data['gm']['rh_volume'],True) + whole_brain.insert(7,"Left_Hemisphere_White_Matter_volume",wb_data['wm']['lh_volume'],True) + whole_brain.insert(8,"Right_Hemisphere_White_Matter_volume",wb_data['wm']['rh_volume'],True) + whole_brain.insert(9,"Left_Hemisphere_Mean_Gray_Matter_thickness",lh_data.whole_brain_measurements['mean_thickness_mm'],True) + whole_brain.insert(10,"Right_Hemisphere_Mean_Gray_Matter_thickness",rh_data.whole_brain_measurements['mean_thickness_mm'],True) + + return whole_brain with open('config.json') as config_f: config = json.load(config_f) output_dir = config["freesurfer"] parc = config["parcellation"] + subjectID = config['_inputs'][0]['meta']['subject'] + fsurf_tags = config['_inputs'][0]['tags'] + +# set flag if freesurfer version is v5 +if 'v5' in fsurf_tags: + fsurf_version = 'v5' +else: + fsurf_version = 'v6+' # left hemisphere lh_stats = CorticalParcellationStats.read(output_dir+'/stats/lh.'+parc+'.stats') dfl = lh_stats.structural_measurements -dfl.to_csv('lh.cortex.csv') +dfl.rename(columns={'structure_name': 'structureID'},inplace=True) +dfl['structureID'] = [ 'lh_'+dfl['structureID'][f] for f in range(len(dfl['structureID'])) ] +dfl['subjectID'] = [ subjectID for x in range(len(dfl['structureID'])) ] +dfl['parcID'] = [ f+1 for f in range(len(dfl['structureID']))] +dfl['nodeID'] = [ int(1) for f in range(len(dfl['structureID'])) ] +dfl = dfl.reindex(columns=['parcID','subjectID','structureID','nodeID','number_of_vertices', 'surface_area_mm^2','gray_matter_volume_mm^3', 'average_thickness_mm','thickness_stddev_mm', 'integrated_rectified_mean_curvature_mm^-1','integrated_rectified_gaussian_curvature_mm^-2', 'folding_index','intrinsic_curvature_index']) +dfl.to_csv('lh.cortex.csv',index=False) # right hemisphere rh_stats = CorticalParcellationStats.read(output_dir+'/stats/rh.'+parc+'.stats') dfr = rh_stats.structural_measurements -dfr.to_csv('rh.cortex.csv') +dfr.rename(columns={'structure_name': 'structureID'},inplace=True) +dfr['structureID'] = [ 'rh_'+dfr['structureID'][f] for f in range(len(dfr['structureID'])) ] +dfr['subjectID'] = [ subjectID for x in range(len(dfr['structureID'])) ] +dfr['parcID'] = [ f+1 for f in range(len(dfr['structureID']))] +dfr['nodeID'] = [ int(1) for f in range(len(dfr['structureID'])) ] +dfr = dfr.reindex(columns=['parcID','subjectID','structureID','nodeID','number_of_vertices', 'surface_area_mm^2','gray_matter_volume_mm^3', 'average_thickness_mm','thickness_stddev_mm', 'integrated_rectified_mean_curvature_mm^-1','integrated_rectified_gaussian_curvature_mm^-2', 'folding_index','intrinsic_curvature_index']) +dfr.to_csv('rh.cortex.csv',index=False) + +# concat left and righ hemispheres +dft = pd.concat([dfl,dfr],ignore_index=True) +dft.to_csv('cortex.csv',index=False) # whole brain -white_matter_stats = open(output_dir+'/stats/wmparc.stats') -[lh_wm_vol,rh_wm_vol,tot_wm_vol] = extract_wm_stats(white_matter_stats) - -whole_brain = lh_stats.whole_brain_measurements[['brain_segmentation_volume_mm^3','estimated_total_intracranial_volume_mm^3']] -whole_brain = whole_brain.rename(columns={"brain_segmentation_volume_mm^3": "Total Brain Volume", "estimated_total_intracranial_volume_mm^3": "Total Intracranial Volume"}) -whole_brain.insert(2,"Total Cortical Gray Matter Volume",lh_stats.whole_brain_measurements['total_cortical_gray_matter_volume_mm^3'],True) -whole_brain.insert(3,"Total White Matter Volume",tot_wm_vol,True) -whole_brain.insert(4,"Left Hemisphere Cortical Gray Matter Volume",numpy.sum(lh_stats.structural_measurements['gray_matter_volume_mm^3']),True) -whole_brain.insert(5,"Right Hemisphere Cortical Gray Matter Volume",numpy.sum(rh_stats.structural_measurements['gray_matter_volume_mm^3']),True) -whole_brain.insert(6,"Left Hemisphere White Matter Volume",lh_wm_vol,True) -whole_brain.insert(7,"Right Hemisphere White Matter Volume",rh_wm_vol,True) -whole_brain.insert(8,"Left Hemisphere Mean Cortical Gray Matter Thickness",lh_stats.whole_brain_measurements['mean_thickness_mm'],True) -whole_brain.insert(9,"Right Hemisphere Mean Cortical Gray Matter Thickness",rh_stats.whole_brain_measurements['mean_thickness_mm'],True) - -whole_brain.to_csv('whole_brain.csv') +wholebrain = open(output_dir+'/stats/aseg.stats') +wholebrain_data = extract_wholebrain_stats(wholebrain,fsurf_version) +whole_brain = create_wholebrain_csv(wholebrain_data,lh_stats,rh_stats,subjectID) +whole_brain.to_csv('whole_brain.csv',index=False) + +# subcortical stats +wholebrain = open(output_dir+'/stats/aseg.stats') +subcortical = extract_subcortical_stats(wholebrain,fsurf_version,subjectID) +subcortical.to_csv('subcortical.csv',index=False) + +# append subject ID to data with coordinates from dan's code +rois = pd.read_csv('rois.csv') +rois['subjectID'] = [ subjectID for x in range(len(rois['ROI_name'])) ] +rois = rois[ ['subjectID'] + [ f for f in rois.columns if f != 'subjectID']] +rois.to_csv('rois.csv',index=False) diff --git a/main b/main index a48c925..fc3f072 100755 --- a/main +++ b/main @@ -1,10 +1,30 @@ #!/bin/bash - -#PBS -l nodes=1:ppn=1 -#PBS -l vmem=16gb -#PBS -l walltime=0:10:00 -#PBS -N app-freesurer-stats +#PBS -l nodes=1:ppn=4,walltime=00:30:00 +#PBS -N app-freesurfer-stats #PBS -V -time singularity exec -e docker://brainlife/freesurfer-stats:1.2 ./create_data_csv.py +fsdir=`jq -r '.freesurfer' config.json` +parc=`jq -r '.parcellation' config.json` + +[ -z "$FREESURFER_LICENSE" ] && echo "Please set FREESURFER_LICENSE in .bashrc" && exit 1; +echo $FREESURFER_LICENSE > license.txt + +if [ ! -f ${parc}+aseg.nii.gz ]; then + time singularity exec -e -B `pwd`/license.txt:/usr/local/freesurfer/license.txt docker://brainlife/freesurfer-mini:6.0.1 bash -c "mri_convert ${fsdir}/mri/${parc}+aseg.mgz ./${parc}+aseg.nii.gz" +fi + +if [ ! -f rois.csv ]; then + time singularity exec -e docker://brainlife/mcr:r2019a ./compiled/parcRoiStats +fi + +if [ ! -f whole_brain.csv ]; then + time singularity exec -e docker://brainlife/freesurfer-stats:1.2 ./create_data_csv.py +fi +if [ ! -f whole_brain.csv ] || [ ! -f rois.csv ]; then + echo "output missing" && exit 1 +else + echo "complete" && mkdir -p ./output ./output/parc-stats/ && mv *.csv ./output/parc-stats/ && rm -rf *.nii.gz + echo "{\"tags\": [\"${parc}\"]}" > product.json + exit 0 +fi diff --git a/parcRoiStats.m b/parcRoiStats.m new file mode 100755 index 0000000..c0e0b41 --- /dev/null +++ b/parcRoiStats.m @@ -0,0 +1,33 @@ +function [] = parcRoiStats() + +if ~isdeployed + disp('loading path') + + %for IU HPC + addpath(genpath('/N/u/brlife/git/vistasoft')) + addpath(genpath('/N/u/brlife/git/jsonlab')) + addpath(genpath('/N/u/brlife/git/wma_tools')) + + %for old VM + addpath(genpath('/usr/local/vistasoft')) + addpath(genpath('/usr/local/jsonlab')) + addpath(genpath('/usr/local/wma_tools')) +end + +% Set top directory +topdir = pwd; + +% Load configuration file +config = loadjson('config.json'); + +% set path for parcellation +parcellation = fullfile(topdir,sprintf('%s+aseg.nii.gz',config.parcellation)); + +% run stats code +[parcStats] = bsc_computeAtlasStats_v2(parcellation); + +% save stats file +writetable(parcStats,'rois.csv'); + +exit +end