nipraxis-summer-2023 · FrancescoNottingham · Sep 18, 2023 · Sep 18, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/findoutlie/dvars_distribution.py b/findoutlie/dvars_distribution.py
@@ -9,27 +9,272 @@
 """
 
 import numpy as np
+from scipy.special import ndtri
+import scipy.stats
 
-def distribution_mean(data):
-    """ Calculate mean of standard distribution on Nibabel image `img`
-    used to calculate p-value for DVARS metrics
+def dvars_pvals(raw_data, alpha=0.05):
+    """ Calculate Bonferroni corrected p_values using chi2 stats and finds 
+    rejected null hypothesys
 
-    Calculate the sum of variances for voxel intensities 
-    and divide by the sum of intensities
+    Parameters
+    ----------
+    raw_data: 4D timeseries
+    alpha: significance threshold
+
+    Returns
+    -------
+    p_vals_adjusted: 1D array of adjusted p_values
+    reject_null: 1D array, true if null hypothesys is rejected 
+    """
+    # scale the data
+    data = volume_scaling(raw_data)
+
+    # calculate chi2 stats
+    chisquared_stats, degrees_of_freedom = dvars_chisquared_stat(data)
+
+    # find p-vals
+    p_vals = 1 - scipy.stats.chi2.cdf(chisquared_stats,degrees_of_freedom)
+
+    # Bonferroni adjusted p_values
+    p_vals = p_vals*len(p_vals)
+
+    # significance testing
+    reject_null = p_vals <= alpha
+
+    return p_vals, reject_null
+
+def volume_scaling(raw_data): 
+    """ Rescales volume intensity values from raw
+    4D timeseries
+
+    Maths:
+
+    M_Ri = raw mean value for voxel i
+    Y_Rit = raw value for voxel i in volume t
+    m_R = mean (or median) raw value of {M_Ri}
+
+    Scaled value for voxel i at time t
+    Y_it = 100*(Y_Rit-M_Ri)/m_R 
+
+    see equation 1: 
+    https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE
+
+    Parameters
+    ----------
+    raw_data: raw 4D timeseries 
+
+    Returns
+    -------
+    data : Scaled 4D timeseries
+    """
+
+    # overall mean value 
+    m_R = np.mean(raw_data)
+
+    scaled_data = np.zeros(raw_data.shape)
+
+    # initiate loop over x,y,z 
+    for x_value in range(raw_data.shape[0]):
+        for y_value in range(raw_data.shape[1]):
+            for z_value in range(raw_data.shape[2]):
+
+                    # calculate mean for voxel timeseries
+                    M_Ri = np.mean(raw_data[x_value,y_value,z_value,:])
+
+                    # loop over volumes
+                    for t_value in range(raw_data.shape[3]):
+
+                        # get individual raw intensities
+                        Y_Rit = raw_data[x_value,y_value,z_value,t_value]
+                        # calculate scaled intensity
+                        Y_it = 100*(Y_Rit-M_Ri)/m_R 
+                        scaled_data[x_value,y_value,z_value,t_value] = Y_it
+
+    return scaled_data
+
+def dvars_chisquared_stat(data):
+    """ Calculate chi squared stats for and degrees of freedom for 4D timeseries 
+
+    Maths:
+
+    X(dvars_t) = 2*mu_0(dvars^2_t)/(variance_0)
+
+    dof = 2*mu_0^2/(variance_0)
 
-    For more information see equation 12: 
+    For more information see equation 11: 
     https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE
 
     Parameters
     ----------
-    img : nibabel image
+    data: 4D timeseries
+
+    Returns
+    -------
+    chi_squared_stats: 1D array
+        One-dimensional array with n-1 elements, where n is the number of
+        volumes in data.
+    dof: degrees of freedom
+    """
+    # calculate dvars^2 values
+    dvars_squared_values = dvars_squared(data)
+
+    # calculate mean for the null distribution
+    null_mean = null_distribution_mean(data)
+    #null_mean = np.median(dvars_squared_values)
+
+    # calculate the variance for the null distribution
+    null_variance = null_distribution_variance(data)
+
+    # calculate chi squared stats
+    return 2*null_mean*(dvars_squared_values)/(null_variance), 2*null_mean**2/(null_variance)
+
+def dvars_squared(data):
+    """ Calculate dvars^2 from 4D data
+
+    The dvars calculation between two volumes is defined as the square root of
+    (the mean of the (voxel differences squared)).
+
+    Parameters
+    ----------
+    data : 4D volumes
+
+    Returns
+    -------
+    dvals^2 : 1D array
+        One-dimensional array with n-1 elements, where n is the number of
+        volumes in data.
+    """
+
+    return np.mean((voxel_differences(data))**2, axis = (0,1,2))
+
+def voxel_differences(data):
+    """ Calculate difference between voxel i at time t 
+    and the same voxel at time t + 1 
+
+    Maths:
+
+    Y_i(t+1)-Y_i(t)
+
+    Parameters
+    ----------
+    data: 4D timeseries of shape (X,Y,Z,T)
+
+    Returns
+    -------
+    dvals : 4D array with one less volume than 'data' of shape (X,Y,Z,T-1)
+    """
+    # create two timeseries with n-1 volumes and find the difference between them
+    # remove the last volume
+    img_start = data[...,:-1]
+    # remove the first volume
+    img_end = data[..., 1:]
+    return img_start - img_end
+
+def null_distribution_mean(data):
+    """ Calculate mean of null distribution on 4D data
+    using robust IQR measurements of voxel intensity differences.
+
+    Maths:
+
+    Step 1
+    Calculate IQR_i/IQR_0 where IQR_i is the IQR for voxel timeseries
+    in location i and IQR_0 is the IQR for the standard null distribution.
+    see function voxel_iqr_variance
+
+    Step 2
+    The median of {IQR_i/IQR_0} is an estimate for the mean of the null
+    distribution, other estimates are available
+
+    For more information see equation 12 and 13: 
+    https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE
+
+    Parameters
+    ----------
+    data: 4D timeseries
 
     Returns
     -------
     mu_0 : float
-        decimal to describe to mean for the standard distribution of the timeseries
+        estimate of the mean for the null distribution of the timeseries
+    """
+    # calculate the IQR of all voxel timeseries
+    voxel_iqrs = voxel_iqr_variance(data)
+
+    # the median of the iqr 
+    return np.median(voxel_iqrs)
+
+def voxel_iqr_variance(data):
+    """ Calculate IQR of voxel timeseries differences
+    used to calculate mean of null distribution
+
+    For more information see equation 13: 
+    https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE
+
+    Parameters
+    ----------
+    data: 4D timeseries
+
+    Returns
+    -------
+    IQR_values: 3D array of IQR for each voxel timeseries difference
+    """
+    # from the data calculate the differences between voxels at t and t+1
+    all_voxel_differences = voxel_differences(data)
+    # all_voxel_differences = np.sqrt(dvars_squared(data))
+
+    # initiate variable for loop
+    iqr_dvars_values = np.zeros(data.shape[:-1])
+
+    # initiate loop over x,y,z 
+    for x_value in range(data.shape[0]):
+        for y_value in range(data.shape[1]):
+            for z_value in range(data.shape[2]):
+
+                # calculate iqr for each voxel timeseries
+                q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75])
+                iqr_dvars_values[x_value,y_value,z_value] = q3 - q1
+
+    # IQR of standard normal distribution
+    iqr_0 = ndtri(0.75) - ndtri(0.25)
+
+    return iqr_dvars_values/iqr_0
+
+def null_distribution_variance(data):
+    """ Calculate estimate of variance for null distribution on 4D data
+
+    Maths:
+
+    Step 1
+    Calculate dvars_squared (from function)
+
+    Step 2
+    Find hIQR({dvars^2_t}) the half IQR defined as the difference between
+    the median and lower quartile.
+
+    Step 3
+    Calculate the variance of the null distribution = hIQR({dvars^2_t})/hIQR_0 
+    where hIQR_0 is half of the IQR for the standard normal distribution
+
+    For more information see equation 14: 
+    https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE
+
+    Parameters
+    ----------
+    data: 4D timeseries
+
+    Returns
+    -------
+    variance_0 : float
+        estimate of the variance for the null distribution of the timeseries
     """
-    # calculate the variance of all voxel timeseries
-    voxel_variances = np.var(data, axis=3)
-    # sum of variances divided by sum of all intensities
-    return np.sum(voxel_variances)/np.sum(data)
+
+    # calculate dvars values
+    dvars_squared_values = dvars_squared(data)
+
+    # calculate hIQR for dvars values
+    q1, q2 = np.percentile(dvars_squared_values, [25, 50])
+    hIQR = q2 - q1
+
+    # define hIQR_0 and calculate null distribution variance
+    hIQR_0 = (ndtri(0.75) - ndtri(0.25))/2
+    return hIQR/hIQR_0
diff --git a/findoutlie/tests/test_dvars_distribution.py b/findoutlie/tests/test_dvars_distribution.py
@@ -15,21 +15,79 @@
 """
 
 import numpy as np
-from findoutlie.dvars_distribution import distribution_mean
+from findoutlie.dvars_distribution import null_distribution_mean, voxel_differences, voxel_iqr_variance, dvars_squared
+from scipy.special import ndtri
+from math import isclose
+import nipraxis as npx
+import nibabel as nib
 
-def test_dvars_distribution():
+# create test volume timeseries filled with normally distributed random numbers
+TEST_NORMALISED_VOLUMES = np.random.normal(size = (10,10,10,1000))
+
+
+def test_null_distribution_mean():
+
+    # create example matrices 
     example_values1 = np.ones((3,3,3,6))
-    example_values2 = np.append(example_values1,example_values1+1, axis=3)
+    example_values2 = np.zeros((3,3,3,12))
+
+    for time_values in range(example_values2.shape[-1]):
+        example_values2[...,time_values] = time_values**2
+
+    example_answer2_array = voxel_differences(example_values2)[1,1,1,:]
+    q1, q3 = np.percentile(example_answer2_array, [25, 75])
+    answer2 = (q3 - q1)/(ndtri(0.75) - ndtri(0.25))
+
+    assert null_distribution_mean(example_values1) == 0
+    assert isclose(null_distribution_mean(example_values2), answer2)
+    #assert isclose(null_distribution_mean(TEST_NORMALISED_VOLUMES), 0)
+
+
+def test_voxel_differences():
+    example_values = np.ones((3,3,3,6))
+    example_values1 = np.append(example_values,example_values+1, axis=3)
+    example_answer3 = TEST_NORMALISED_VOLUMES[...,1]-TEST_NORMALISED_VOLUMES[...,0]
+
+    # check correct shape for answer
+    assert voxel_differences(example_values).shape == (3,3,3,5)
+
+    # check correct order of operations and values
+    example_answer1 = np.zeros(example_values.shape[:-1]+(example_values.shape[-1] - 1,))
+    example_answer1 = np.append(example_answer1,example_values, axis=3)
+    assert (voxel_differences(example_values1) == example_answer1).all
+    assert (voxel_differences(TEST_NORMALISED_VOLUMES)[...,0] == example_answer3).all
+
+
+def test_voxel_iqr_variance():
+
+    example_values = np.zeros((3,3,3,12))
+    for time_values in range(example_values.shape[-1]):
+        example_values[...,time_values] = time_values
+
+    q1, q3 = np.percentile(example_values[1,1,1,:], [25, 75])
+    example_answer = np.zeros(example_values.shape[:-1]) + q3 - q1
+    assert example_answer.shape == voxel_iqr_variance(example_values).shape
+    assert (example_answer == voxel_iqr_variance(example_values)).all
+
 
-    # Values for exmple 2
-    example2_intensity_sum = 27*6 + 27*6*2
-    example2_variance = 0.25
-    example2_variance_sum = 0.25*27
-    example_distr_mean2 = example2_variance_sum/example2_intensity_sum
+TEST_FNAME = npx.fetch_file('ds114_sub009_t2r1.nii')
 
-    dvars_mean1 = distribution_mean(example_values1)
-    dvars_mean2 = distribution_mean(example_values2)
+def test_dvars_squared():
+    img = nib.load(TEST_FNAME)
+    n_trs = img.shape[-1]
+    n_voxels = np.prod(img.shape[:-1])
+    data = img.get_fdata()
+    dvals = dvars_squared(data)
+    assert len(dvals) == n_trs - 1
+    # Calculate the values the long way round
+    data = img.get_fdata()
+    prev_vol = data[..., 0]
+    long_dvals = []
+    for i in range(1, n_trs):
+        this_vol = data[..., i]
+        d = this_vol - prev_vol
+        long_dvals.append(np.sum(d ** 2) / n_voxels)
+        prev_vol = this_vol
+    assert np.allclose(dvals, long_dvals)
 
-    assert dvars_mean1 == 0
-    assert dvars_mean2 == example_distr_mean2