From be2ceb9010341695f67bc92716bde76af4754066 Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Mon, 18 Sep 2023 18:33:15 +0100 Subject: [PATCH 1/6] changed some descriptions and added a function to calculatedifferences between voxels in timeseries --- findoutlie/dvars_distribution.py | 33 +++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index c4b66d3..d7a222e 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -11,7 +11,7 @@ import numpy as np def distribution_mean(data): - """ Calculate mean of standard distribution on Nibabel image `img` + """ Calculate mean of standard distribution on 4D data used to calculate p-value for DVARS metrics Calculate the sum of variances for voxel intensities @@ -22,7 +22,7 @@ def distribution_mean(data): Parameters ---------- - img : nibabel image + data: 4D timeseries Returns ------- @@ -32,4 +32,31 @@ def distribution_mean(data): # calculate the variance of all voxel timeseries voxel_variances = np.var(data, axis=3) # sum of variances divided by sum of all intensities - return np.sum(voxel_variances)/np.sum(data) \ No newline at end of file + return np.sum(voxel_variances)/np.sum(data) + +def dvars_data(data): + """ Calculate dvars metric on 4D data + + The dvars calculation between two volumes is defined as the square root of + (the mean of the (voxel differences squared)). + + Parameters + ---------- + data: 4D timeseries + + Returns + ------- + dvals : 1D array + One-dimensional array with n-1 elements, where n is the number of + volumes in data. + """ + # create two timeseries with n-1 volumes and find the difference between them + # remove the last volume + img_start = data[...,:-1] + # remove the first volume + img_end = data[..., 1:] + return np.sqrt(np.mean((img_start - img_end)**2, axis = (0,1,2))) + +def distribution_variance(data): + + dvars_data(data): \ No newline at end of file From 3c368f112ac644fbac1a270aa2ef3dc4e0428bef Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Mon, 18 Sep 2023 23:17:40 +0100 Subject: [PATCH 2/6] fixed incorrect calculation of null distribution mean by adding iqr differences in voxel timeseries --- findoutlie/dvars_distribution.py | 71 +++++++++++++++------ findoutlie/tests/test_dvars_distribution.py | 48 ++++++++++---- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index d7a222e..8a8be1f 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -9,15 +9,18 @@ """ import numpy as np +import scipy +from scipy.special import ndtri + def distribution_mean(data): - """ Calculate mean of standard distribution on 4D data + """ Calculate mean of null distribution on 4D data used to calculate p-value for DVARS metrics - Calculate the sum of variances for voxel intensities - and divide by the sum of intensities + Calculate the mean variances of voxel intensities + - For more information see equation 12: + For more information see equation 12 and 13: https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE Parameters @@ -27,36 +30,66 @@ def distribution_mean(data): Returns ------- mu_0 : float - decimal to describe to mean for the standard distribution of the timeseries + decimal to describe the mean for the null distribution of the timeseries """ # calculate the variance of all voxel timeseries - voxel_variances = np.var(data, axis=3) - # sum of variances divided by sum of all intensities - return np.sum(voxel_variances)/np.sum(data) + voxel_variances = voxel_iqr_variance(data) -def dvars_data(data): - """ Calculate dvars metric on 4D data + # sum of variances divided by voxels per volume (mean) + return np.mean(voxel_variances) - The dvars calculation between two volumes is defined as the square root of - (the mean of the (voxel differences squared)). +def voxel_differences(data): + """ Calculate difference between voxel i at time t + and the same voxel at time t + 1 + + V(t+1)-V(t) Parameters ---------- - data: 4D timeseries + data: normalised 4D timeseries Returns ------- - dvals : 1D array - One-dimensional array with n-1 elements, where n is the number of - volumes in data. + dvals : 4D array with one less volume than 'data' """ # create two timeseries with n-1 volumes and find the difference between them # remove the last volume img_start = data[...,:-1] # remove the first volume img_end = data[..., 1:] - return np.sqrt(np.mean((img_start - img_end)**2, axis = (0,1,2))) + return img_start - img_end + +def voxel_iqr_variance(data): + """ Calculate variance of voxel timeseries using iqr + used to calculate mean of null distribution + + For more information see equation 13: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + data: 4D difference of adjacent voxel intensities + + Returns + ------- + variance: 3D array of variances for each voxel + """ + # from the data calculate the differences between voxels at t and t+1 + all_voxel_differences = voxel_differences(data) + + # initiate variable for loop + iqr_dvars_values = np.zeros(data.shape[:-1]) + + # initiate loop over x,y,z + for x_value in range(data.shape[0]): + for y_value in range(data.shape[1]): + for z_value in range(data.shape[2]): + + # calculate iqr for each voxel timeseries + q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75]) + iqr_dvars_values[x_value,y_value,z_value] = q3 - q1 -def distribution_variance(data): + # IQR os standard normal distribution + iqr_0 = ndtri(0.75) - ndtri(0.25) - dvars_data(data): \ No newline at end of file + return iqr_dvars_values/iqr_0 \ No newline at end of file diff --git a/findoutlie/tests/test_dvars_distribution.py b/findoutlie/tests/test_dvars_distribution.py index a413c6a..4abdabc 100644 --- a/findoutlie/tests/test_dvars_distribution.py +++ b/findoutlie/tests/test_dvars_distribution.py @@ -15,21 +15,47 @@ """ import numpy as np -from findoutlie.dvars_distribution import distribution_mean +from findoutlie.dvars_distribution import distribution_mean, voxel_differences, voxel_iqr_variance +from scipy.special import ndtri +from math import isclose def test_dvars_distribution(): example_values1 = np.ones((3,3,3,6)) - example_values2 = np.append(example_values1,example_values1+1, axis=3) + example_values2 = np.zeros((3,3,3,12)) + for time_values in range(example_values2.shape[-1]): + example_values2[...,time_values] = time_values**2 + + example_answer2_array = voxel_differences(example_values2)[1,1,1,:] + q1, q3 = np.percentile(example_answer2_array, [25, 75]) + answer2 = (q3 - q1)/(ndtri(0.75) - ndtri(0.25)) + + assert distribution_mean(example_values1) == 0 + assert isclose(distribution_mean(example_values2), answer2) + +def test_voxel_differences(): + example_values = np.ones((3,3,3,6)) + example_values1 = np.append(example_values,example_values+1, axis=3) + + # check correct shape for answer + assert voxel_differences(example_values).shape == (3,3,3,5) + + # check correct order of operations and values + example_answer1 = np.zeros(example_values.shape[:-1]+(example_values.shape[-1] - 1,)) + example_answer1 = np.append(example_answer1,example_values, axis=3) + assert (voxel_differences(example_values1) == example_answer1).all + +def test_voxel_iqr_variance(): + + example_values = np.zeros((3,3,3,12)) + for time_values in range(example_values.shape[-1]): + example_values[...,time_values] = time_values + + q1, q3 = np.percentile(example_values[1,1,1,:], [25, 75]) + example_answer = np.zeros(example_values.shape[:-1]) + q3 - q1 + assert example_answer.shape == voxel_iqr_variance(example_values).shape + assert (example_answer == voxel_iqr_variance(example_values)).all + - # Values for exmple 2 - example2_intensity_sum = 27*6 + 27*6*2 - example2_variance = 0.25 - example2_variance_sum = 0.25*27 - example_distr_mean2 = example2_variance_sum/example2_intensity_sum - dvars_mean1 = distribution_mean(example_values1) - dvars_mean2 = distribution_mean(example_values2) - assert dvars_mean1 == 0 - assert dvars_mean2 == example_distr_mean2 From 90fba03530de2d358c0f507caf545f9af8331408 Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Tue, 19 Sep 2023 13:28:31 +0100 Subject: [PATCH 3/6] Added and clarified comments of functions, fixed calculation in distribution_mean function to calculate the median of voxel IQR timeseries differences instead of the mean (look at reference paper beneath equation 13) --- findoutlie/dvars_distribution.py | 32 ++++++++++++++------- findoutlie/tests/test_dvars_distribution.py | 6 +++- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index 8a8be1f..a306924 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -15,10 +15,18 @@ def distribution_mean(data): """ Calculate mean of null distribution on 4D data - used to calculate p-value for DVARS metrics + using robust IQR measurements of voxel intensity differences. - Calculate the mean variances of voxel intensities + Maths: + Step 1 + Calculate IQR_i/IQR_0 where IQR_i is the IQR for voxel timeseries + in location i and IQR_0 is the IQR for the standard null distribution. + see function voxel_iqr_variance + + Step 2 + The median of {IQR_i/IQR_0} is an estimate for the mean of the null + distribution, other estimates are available For more information see equation 12 and 13: https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE @@ -35,22 +43,24 @@ def distribution_mean(data): # calculate the variance of all voxel timeseries voxel_variances = voxel_iqr_variance(data) - # sum of variances divided by voxels per volume (mean) - return np.mean(voxel_variances) + # the median of the iqr + return np.median(voxel_variances) def voxel_differences(data): """ Calculate difference between voxel i at time t and the same voxel at time t + 1 - V(t+1)-V(t) + Maths: + + V_i(t+1)-V_i(t) Parameters ---------- - data: normalised 4D timeseries + data: normalised 4D timeseries of shape (X,Y,Z,T) Returns ------- - dvals : 4D array with one less volume than 'data' + dvals : 4D array with one less volume than 'data' of shape (X,Y,Z,T-1) """ # create two timeseries with n-1 volumes and find the difference between them # remove the last volume @@ -60,7 +70,7 @@ def voxel_differences(data): return img_start - img_end def voxel_iqr_variance(data): - """ Calculate variance of voxel timeseries using iqr + """ Calculate IQR of voxel timeseries differences used to calculate mean of null distribution For more information see equation 13: @@ -68,11 +78,11 @@ def voxel_iqr_variance(data): Parameters ---------- - data: 4D difference of adjacent voxel intensities + data: 4D timeseries Returns ------- - variance: 3D array of variances for each voxel + IQR_values: 3D array of IQR for each voxel timeseries difference """ # from the data calculate the differences between voxels at t and t+1 all_voxel_differences = voxel_differences(data) @@ -89,7 +99,7 @@ def voxel_iqr_variance(data): q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75]) iqr_dvars_values[x_value,y_value,z_value] = q3 - q1 - # IQR os standard normal distribution + # IQR of standard normal distribution iqr_0 = ndtri(0.75) - ndtri(0.25) return iqr_dvars_values/iqr_0 \ No newline at end of file diff --git a/findoutlie/tests/test_dvars_distribution.py b/findoutlie/tests/test_dvars_distribution.py index 4abdabc..07b1eb3 100644 --- a/findoutlie/tests/test_dvars_distribution.py +++ b/findoutlie/tests/test_dvars_distribution.py @@ -19,9 +19,12 @@ from scipy.special import ndtri from math import isclose -def test_dvars_distribution(): +def test_distribution_mean(): + + # create example matrices example_values1 = np.ones((3,3,3,6)) example_values2 = np.zeros((3,3,3,12)) + for time_values in range(example_values2.shape[-1]): example_values2[...,time_values] = time_values**2 @@ -32,6 +35,7 @@ def test_dvars_distribution(): assert distribution_mean(example_values1) == 0 assert isclose(distribution_mean(example_values2), answer2) + def test_voxel_differences(): example_values = np.ones((3,3,3,6)) example_values1 = np.append(example_values,example_values+1, axis=3) From b91b1f2ddc322527f30647d055df7be7d9adf043 Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Tue, 19 Sep 2023 17:24:44 +0100 Subject: [PATCH 4/6] added estimation of variance for null distribution, correction of variance using power calculation and calculation of z-scores using the null distribution --- findoutlie/dvars_distribution.py | 170 +++++++++++++++++++- findoutlie/tests/test_dvars_distribution.py | 30 +++- 2 files changed, 192 insertions(+), 8 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index a306924..4229bae 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -13,7 +13,7 @@ from scipy.special import ndtri -def distribution_mean(data): +def null_distribution_mean(data): """ Calculate mean of null distribution on 4D data using robust IQR measurements of voxel intensity differences. @@ -38,7 +38,7 @@ def distribution_mean(data): Returns ------- mu_0 : float - decimal to describe the mean for the null distribution of the timeseries + estimate of the mean for the null distribution of the timeseries """ # calculate the variance of all voxel timeseries voxel_variances = voxel_iqr_variance(data) @@ -102,4 +102,168 @@ def voxel_iqr_variance(data): # IQR of standard normal distribution iqr_0 = ndtri(0.75) - ndtri(0.25) - return iqr_dvars_values/iqr_0 \ No newline at end of file + return iqr_dvars_values/iqr_0 + +def volume_normalisation(raw_data): + """ Calculate normalised volume intensity values from raw + 4D timeseries + + Maths: + + M_Ri = raw mean value for voxel i + Y_Rit = raw value for voxel i in volume t + m_R = mean (or median) raw value of {M_Ri} + + Normalised value for voxel i at time t + Y_it = 100*(Y_Rit-M_Ri)/m_R + + see equation 1: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + raw_data: raw 4D timeseries + + Returns + ------- + data : normalised 4D timeseries + """ + + # overall mean value (note this could be changed to the median) + m_R = np.mean(raw_data) + + normalised_data = np.zeros(raw_data.shape) + + # initiate loop over x,y,z + for x_value in range(raw_data.shape[0]): + for y_value in range(raw_data.shape[1]): + for z_value in range(raw_data.shape[2]): + + # calculate mean for voxel timeseries + M_Ri = np.mean(raw_data[x_value,y_value,z_value,:]) + + # loop over volumes + for t_value in range(raw_data.shape[3]): + + # get individual raw intensities + Y_Rit = raw_data[x_value,y_value,z_value,t_value] + # calculate normalised intensity + Y_it = 100*(Y_Rit-M_Ri)/m_R + normalised_data[x_value,y_value,z_value,t_value] = Y_it + + return normalised_data + +def dvars_squared(data): + """ Calculate dvars^2 from 4D data + + The dvars calculation between two volumes is defined as the square root of + (the mean of the (voxel differences squared)). + + Parameters + ---------- + data : 4D volumes + + Returns + ------- + dvals^2 : 1D array + One-dimensional array with n-1 elements, where n is the number of + volumes in data. + """ + # create two timeseries with n-1 volumes and find the difference between them + # remove the last volume + img_start = data[...,:-1] + # remove the first volume + img_end = data[..., 1:] + + return np.mean((img_start - img_end)**2, axis = (0,1,2)) + +def null_distribution_variance(data): + """ Calculate estimate of variance for null distribution on 4D data + + Maths: + + Step 1 + Calculate dvars_squared (from function) + + Step 2 + Find hIQR({dvars^2_t}) the half IQR defined as the difference between + the median and lower quartile. + + Step 3 + Calculate the variance of the null distribution = hIQR({dvars^2_t})/hIQR_0 + where hIQR_0 is half of the IQR for the standard normal distribution + + For more information see equation 14: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + data: 4D timeseries + + Returns + ------- + variance_0 : float + estimate of the variance for the null distribution of the timeseries + """ + + # calculate dvars values + dvars_squared_values = dvars_squared(data) + + # calculate hIQR for dvars values + q1, q2 = np.percentile(dvars_squared_values, [25, 50]) + hIQR = q2 - q1 + + # define hIQR_0 and calculate null distribution variance + hIQR_0 = (ndtri(0.75) - ndtri(0.25))/2 + return hIQR/hIQR_0 + +def dvars_z_scores(data): + """ Calculate z_scores for dvars values testing if dvars values + belong to the null distribution + + Maths: + + Z(dvars_t) = (dvars^2_t - mu_0)/sqrt(variance_0) + + For more information see equation 15: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + data: 4D timeseries + + Returns + ------- + z_scores: 1D array + One-dimensional array with n-1 elements, where n is the number of + volumes in data. + """ + + dvars_squared_values = dvars_squared(data) + null_mean = null_distribution_mean(data) + null_variance = null_distribution_variance(data) + + return (dvars_squared_values-null_mean)/np.sqrt(null_variance) + +def null_variance_correction(null_mean,null_variance, d=3): + """ Correcting from possible positive skew of estimated null distribution using + power transformation (cube root transformation) + + Maths: + + Variance(dvars_t) = (1/d)*variance_0*mu_0^(2/d-2) + + For more information see Appendix F.2: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + null_mean: estimated mean of the null distribution + null_variance: uncorrected estimate for the null variance + d: transformation factor (optimal at 3) + + Returns + ------- + corrected_variance: float + """ + return (1/d)*null_variance*null_mean**(2/d-2) \ No newline at end of file diff --git a/findoutlie/tests/test_dvars_distribution.py b/findoutlie/tests/test_dvars_distribution.py index 07b1eb3..cf97d8e 100644 --- a/findoutlie/tests/test_dvars_distribution.py +++ b/findoutlie/tests/test_dvars_distribution.py @@ -15,11 +15,13 @@ """ import numpy as np -from findoutlie.dvars_distribution import distribution_mean, voxel_differences, voxel_iqr_variance +from findoutlie.dvars_distribution import null_distribution_mean, voxel_differences, voxel_iqr_variance, dvars_squared from scipy.special import ndtri from math import isclose +import nipraxis as npx +import nibabel as nib -def test_distribution_mean(): +def test_null_distribution_mean(): # create example matrices example_values1 = np.ones((3,3,3,6)) @@ -32,8 +34,8 @@ def test_distribution_mean(): q1, q3 = np.percentile(example_answer2_array, [25, 75]) answer2 = (q3 - q1)/(ndtri(0.75) - ndtri(0.25)) - assert distribution_mean(example_values1) == 0 - assert isclose(distribution_mean(example_values2), answer2) + assert null_distribution_mean(example_values1) == 0 + assert isclose(null_distribution_mean(example_values2), answer2) def test_voxel_differences(): @@ -60,6 +62,24 @@ def test_voxel_iqr_variance(): assert (example_answer == voxel_iqr_variance(example_values)).all - +TEST_FNAME = npx.fetch_file('ds114_sub009_t2r1.nii') + +def test_dvars_squared(): + img = nib.load(TEST_FNAME) + n_trs = img.shape[-1] + n_voxels = np.prod(img.shape[:-1]) + data = img.get_fdata() + dvals = dvars_squared(data) + assert len(dvals) == n_trs - 1 + # Calculate the values the long way round + data = img.get_fdata() + prev_vol = data[..., 0] + long_dvals = [] + for i in range(1, n_trs): + this_vol = data[..., i] + d = this_vol - prev_vol + long_dvals.append(np.sum(d ** 2) / n_voxels) + prev_vol = this_vol + assert np.allclose(dvals, long_dvals) From 8b69c21a19bb8ce634c4a2a0c544ad3d2ca3a12b Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Tue, 19 Sep 2023 17:46:51 +0100 Subject: [PATCH 5/6] minor changes to comments --- findoutlie/dvars_distribution.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index 4229bae..4eac662 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -241,9 +241,10 @@ def dvars_z_scores(data): dvars_squared_values = dvars_squared(data) null_mean = null_distribution_mean(data) - null_variance = null_distribution_variance(data) - - return (dvars_squared_values-null_mean)/np.sqrt(null_variance) + null_variance_uncorrected = null_distribution_variance(data) + null_variance_corrected = null_variance_correction(null_mean,null_variance_uncorrected) + + return (dvars_squared_values-null_mean)/np.sqrt(null_variance_corrected) def null_variance_correction(null_mean,null_variance, d=3): """ Correcting from possible positive skew of estimated null distribution using From a552749806282499c89f1bacfc9ca2408dcf1146 Mon Sep 17 00:00:00 2001 From: Francesco D'Antonio Date: Thu, 28 Sep 2023 07:27:28 +0100 Subject: [PATCH 6/6] updated p-value calculation to come from chi2 distribution -note: chi2 metrics are still too high so p-values calculated are all too small --- findoutlie/dvars_distribution.py | 296 ++++++++++---------- findoutlie/tests/test_dvars_distribution.py | 8 + 2 files changed, 161 insertions(+), 143 deletions(-) diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py index 4eac662..a18a6e7 100644 --- a/findoutlie/dvars_distribution.py +++ b/findoutlie/dvars_distribution.py @@ -9,103 +9,42 @@ """ import numpy as np -import scipy from scipy.special import ndtri +import scipy.stats - -def null_distribution_mean(data): - """ Calculate mean of null distribution on 4D data - using robust IQR measurements of voxel intensity differences. - - Maths: - - Step 1 - Calculate IQR_i/IQR_0 where IQR_i is the IQR for voxel timeseries - in location i and IQR_0 is the IQR for the standard null distribution. - see function voxel_iqr_variance - - Step 2 - The median of {IQR_i/IQR_0} is an estimate for the mean of the null - distribution, other estimates are available - - For more information see equation 12 and 13: - https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE +def dvars_pvals(raw_data, alpha=0.05): + """ Calculate Bonferroni corrected p_values using chi2 stats and finds + rejected null hypothesys Parameters ---------- - data: 4D timeseries + raw_data: 4D timeseries + alpha: significance threshold Returns ------- - mu_0 : float - estimate of the mean for the null distribution of the timeseries + p_vals_adjusted: 1D array of adjusted p_values + reject_null: 1D array, true if null hypothesys is rejected """ - # calculate the variance of all voxel timeseries - voxel_variances = voxel_iqr_variance(data) - - # the median of the iqr - return np.median(voxel_variances) - -def voxel_differences(data): - """ Calculate difference between voxel i at time t - and the same voxel at time t + 1 + # scale the data + data = volume_scaling(raw_data) - Maths: - - V_i(t+1)-V_i(t) - - Parameters - ---------- - data: normalised 4D timeseries of shape (X,Y,Z,T) - - Returns - ------- - dvals : 4D array with one less volume than 'data' of shape (X,Y,Z,T-1) - """ - # create two timeseries with n-1 volumes and find the difference between them - # remove the last volume - img_start = data[...,:-1] - # remove the first volume - img_end = data[..., 1:] - return img_start - img_end - -def voxel_iqr_variance(data): - """ Calculate IQR of voxel timeseries differences - used to calculate mean of null distribution - - For more information see equation 13: - https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE - - Parameters - ---------- - data: 4D timeseries - - Returns - ------- - IQR_values: 3D array of IQR for each voxel timeseries difference - """ - # from the data calculate the differences between voxels at t and t+1 - all_voxel_differences = voxel_differences(data) + # calculate chi2 stats + chisquared_stats, degrees_of_freedom = dvars_chisquared_stat(data) - # initiate variable for loop - iqr_dvars_values = np.zeros(data.shape[:-1]) - - # initiate loop over x,y,z - for x_value in range(data.shape[0]): - for y_value in range(data.shape[1]): - for z_value in range(data.shape[2]): - - # calculate iqr for each voxel timeseries - q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75]) - iqr_dvars_values[x_value,y_value,z_value] = q3 - q1 - - # IQR of standard normal distribution - iqr_0 = ndtri(0.75) - ndtri(0.25) - - return iqr_dvars_values/iqr_0 + # find p-vals + p_vals = 1 - scipy.stats.chi2.cdf(chisquared_stats,degrees_of_freedom) + + # Bonferroni adjusted p_values + p_vals = p_vals*len(p_vals) + + # significance testing + reject_null = p_vals <= alpha + + return p_vals, reject_null -def volume_normalisation(raw_data): - """ Calculate normalised volume intensity values from raw +def volume_scaling(raw_data): + """ Rescales volume intensity values from raw 4D timeseries Maths: @@ -114,7 +53,7 @@ def volume_normalisation(raw_data): Y_Rit = raw value for voxel i in volume t m_R = mean (or median) raw value of {M_Ri} - Normalised value for voxel i at time t + Scaled value for voxel i at time t Y_it = 100*(Y_Rit-M_Ri)/m_R see equation 1: @@ -126,13 +65,13 @@ def volume_normalisation(raw_data): Returns ------- - data : normalised 4D timeseries + data : Scaled 4D timeseries """ - # overall mean value (note this could be changed to the median) + # overall mean value m_R = np.mean(raw_data) - normalised_data = np.zeros(raw_data.shape) + scaled_data = np.zeros(raw_data.shape) # initiate loop over x,y,z for x_value in range(raw_data.shape[0]): @@ -144,14 +83,50 @@ def volume_normalisation(raw_data): # loop over volumes for t_value in range(raw_data.shape[3]): - + # get individual raw intensities Y_Rit = raw_data[x_value,y_value,z_value,t_value] - # calculate normalised intensity + # calculate scaled intensity Y_it = 100*(Y_Rit-M_Ri)/m_R - normalised_data[x_value,y_value,z_value,t_value] = Y_it + scaled_data[x_value,y_value,z_value,t_value] = Y_it + + return scaled_data + +def dvars_chisquared_stat(data): + """ Calculate chi squared stats for and degrees of freedom for 4D timeseries + + Maths: + + X(dvars_t) = 2*mu_0(dvars^2_t)/(variance_0) + + dof = 2*mu_0^2/(variance_0) + + For more information see equation 11: + https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE + + Parameters + ---------- + data: 4D timeseries + + Returns + ------- + chi_squared_stats: 1D array + One-dimensional array with n-1 elements, where n is the number of + volumes in data. + dof: degrees of freedom + """ + # calculate dvars^2 values + dvars_squared_values = dvars_squared(data) - return normalised_data + # calculate mean for the null distribution + null_mean = null_distribution_mean(data) + #null_mean = np.median(dvars_squared_values) + + # calculate the variance for the null distribution + null_variance = null_distribution_variance(data) + + # calculate chi squared stats + return 2*null_mean*(dvars_squared_values)/(null_variance), 2*null_mean**2/(null_variance) def dvars_squared(data): """ Calculate dvars^2 from 4D data @@ -169,31 +144,48 @@ def dvars_squared(data): One-dimensional array with n-1 elements, where n is the number of volumes in data. """ + + return np.mean((voxel_differences(data))**2, axis = (0,1,2)) + +def voxel_differences(data): + """ Calculate difference between voxel i at time t + and the same voxel at time t + 1 + + Maths: + + Y_i(t+1)-Y_i(t) + + Parameters + ---------- + data: 4D timeseries of shape (X,Y,Z,T) + + Returns + ------- + dvals : 4D array with one less volume than 'data' of shape (X,Y,Z,T-1) + """ # create two timeseries with n-1 volumes and find the difference between them # remove the last volume img_start = data[...,:-1] # remove the first volume img_end = data[..., 1:] + return img_start - img_end - return np.mean((img_start - img_end)**2, axis = (0,1,2)) - -def null_distribution_variance(data): - """ Calculate estimate of variance for null distribution on 4D data +def null_distribution_mean(data): + """ Calculate mean of null distribution on 4D data + using robust IQR measurements of voxel intensity differences. Maths: Step 1 - Calculate dvars_squared (from function) + Calculate IQR_i/IQR_0 where IQR_i is the IQR for voxel timeseries + in location i and IQR_0 is the IQR for the standard null distribution. + see function voxel_iqr_variance Step 2 - Find hIQR({dvars^2_t}) the half IQR defined as the difference between - the median and lower quartile. - - Step 3 - Calculate the variance of the null distribution = hIQR({dvars^2_t})/hIQR_0 - where hIQR_0 is half of the IQR for the standard normal distribution + The median of {IQR_i/IQR_0} is an estimate for the mean of the null + distribution, other estimates are available - For more information see equation 14: + For more information see equation 12 and 13: https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE Parameters @@ -202,30 +194,20 @@ def null_distribution_variance(data): Returns ------- - variance_0 : float - estimate of the variance for the null distribution of the timeseries + mu_0 : float + estimate of the mean for the null distribution of the timeseries """ + # calculate the IQR of all voxel timeseries + voxel_iqrs = voxel_iqr_variance(data) - # calculate dvars values - dvars_squared_values = dvars_squared(data) - - # calculate hIQR for dvars values - q1, q2 = np.percentile(dvars_squared_values, [25, 50]) - hIQR = q2 - q1 - - # define hIQR_0 and calculate null distribution variance - hIQR_0 = (ndtri(0.75) - ndtri(0.25))/2 - return hIQR/hIQR_0 - -def dvars_z_scores(data): - """ Calculate z_scores for dvars values testing if dvars values - belong to the null distribution - - Maths: + # the median of the iqr + return np.median(voxel_iqrs) - Z(dvars_t) = (dvars^2_t - mu_0)/sqrt(variance_0) +def voxel_iqr_variance(data): + """ Calculate IQR of voxel timeseries differences + used to calculate mean of null distribution - For more information see equation 15: + For more information see equation 13: https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE Parameters @@ -234,37 +216,65 @@ def dvars_z_scores(data): Returns ------- - z_scores: 1D array - One-dimensional array with n-1 elements, where n is the number of - volumes in data. + IQR_values: 3D array of IQR for each voxel timeseries difference """ + # from the data calculate the differences between voxels at t and t+1 + all_voxel_differences = voxel_differences(data) + # all_voxel_differences = np.sqrt(dvars_squared(data)) - dvars_squared_values = dvars_squared(data) - null_mean = null_distribution_mean(data) - null_variance_uncorrected = null_distribution_variance(data) - null_variance_corrected = null_variance_correction(null_mean,null_variance_uncorrected) + # initiate variable for loop + iqr_dvars_values = np.zeros(data.shape[:-1]) + + # initiate loop over x,y,z + for x_value in range(data.shape[0]): + for y_value in range(data.shape[1]): + for z_value in range(data.shape[2]): + + # calculate iqr for each voxel timeseries + q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75]) + iqr_dvars_values[x_value,y_value,z_value] = q3 - q1 + + # IQR of standard normal distribution + iqr_0 = ndtri(0.75) - ndtri(0.25) - return (dvars_squared_values-null_mean)/np.sqrt(null_variance_corrected) + return iqr_dvars_values/iqr_0 -def null_variance_correction(null_mean,null_variance, d=3): - """ Correcting from possible positive skew of estimated null distribution using - power transformation (cube root transformation) +def null_distribution_variance(data): + """ Calculate estimate of variance for null distribution on 4D data Maths: - Variance(dvars_t) = (1/d)*variance_0*mu_0^(2/d-2) + Step 1 + Calculate dvars_squared (from function) + + Step 2 + Find hIQR({dvars^2_t}) the half IQR defined as the difference between + the median and lower quartile. - For more information see Appendix F.2: + Step 3 + Calculate the variance of the null distribution = hIQR({dvars^2_t})/hIQR_0 + where hIQR_0 is half of the IQR for the standard normal distribution + + For more information see equation 14: https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE Parameters ---------- - null_mean: estimated mean of the null distribution - null_variance: uncorrected estimate for the null variance - d: transformation factor (optimal at 3) + data: 4D timeseries Returns ------- - corrected_variance: float + variance_0 : float + estimate of the variance for the null distribution of the timeseries """ - return (1/d)*null_variance*null_mean**(2/d-2) \ No newline at end of file + + # calculate dvars values + dvars_squared_values = dvars_squared(data) + + # calculate hIQR for dvars values + q1, q2 = np.percentile(dvars_squared_values, [25, 50]) + hIQR = q2 - q1 + + # define hIQR_0 and calculate null distribution variance + hIQR_0 = (ndtri(0.75) - ndtri(0.25))/2 + return hIQR/hIQR_0 diff --git a/findoutlie/tests/test_dvars_distribution.py b/findoutlie/tests/test_dvars_distribution.py index cf97d8e..3ef4349 100644 --- a/findoutlie/tests/test_dvars_distribution.py +++ b/findoutlie/tests/test_dvars_distribution.py @@ -21,6 +21,10 @@ import nipraxis as npx import nibabel as nib +# create test volume timeseries filled with normally distributed random numbers +TEST_NORMALISED_VOLUMES = np.random.normal(size = (10,10,10,1000)) + + def test_null_distribution_mean(): # create example matrices @@ -36,11 +40,13 @@ def test_null_distribution_mean(): assert null_distribution_mean(example_values1) == 0 assert isclose(null_distribution_mean(example_values2), answer2) + #assert isclose(null_distribution_mean(TEST_NORMALISED_VOLUMES), 0) def test_voxel_differences(): example_values = np.ones((3,3,3,6)) example_values1 = np.append(example_values,example_values+1, axis=3) + example_answer3 = TEST_NORMALISED_VOLUMES[...,1]-TEST_NORMALISED_VOLUMES[...,0] # check correct shape for answer assert voxel_differences(example_values).shape == (3,3,3,5) @@ -49,6 +55,8 @@ def test_voxel_differences(): example_answer1 = np.zeros(example_values.shape[:-1]+(example_values.shape[-1] - 1,)) example_answer1 = np.append(example_answer1,example_values, axis=3) assert (voxel_differences(example_values1) == example_answer1).all + assert (voxel_differences(TEST_NORMALISED_VOLUMES)[...,0] == example_answer3).all + def test_voxel_iqr_variance():