Skip to content
269 changes: 257 additions & 12 deletions findoutlie/dvars_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,272 @@
"""

import numpy as np
from scipy.special import ndtri
import scipy.stats

def distribution_mean(data):
""" Calculate mean of standard distribution on Nibabel image `img`
used to calculate p-value for DVARS metrics
def dvars_pvals(raw_data, alpha=0.05):
""" Calculate Bonferroni corrected p_values using chi2 stats and finds
rejected null hypothesys

Calculate the sum of variances for voxel intensities
and divide by the sum of intensities
Parameters
----------
raw_data: 4D timeseries
alpha: significance threshold

Returns
-------
p_vals_adjusted: 1D array of adjusted p_values
reject_null: 1D array, true if null hypothesys is rejected
"""
# scale the data
data = volume_scaling(raw_data)

Check warning on line 30 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L30

Added line #L30 was not covered by tests

# calculate chi2 stats
chisquared_stats, degrees_of_freedom = dvars_chisquared_stat(data)

Check warning on line 33 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L33

Added line #L33 was not covered by tests

# find p-vals
p_vals = 1 - scipy.stats.chi2.cdf(chisquared_stats,degrees_of_freedom)

Check warning on line 36 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L36

Added line #L36 was not covered by tests

# Bonferroni adjusted p_values
p_vals = p_vals*len(p_vals)

Check warning on line 39 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L39

Added line #L39 was not covered by tests

# significance testing
reject_null = p_vals <= alpha

Check warning on line 42 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L42

Added line #L42 was not covered by tests

return p_vals, reject_null

Check warning on line 44 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L44

Added line #L44 was not covered by tests

def volume_scaling(raw_data):
""" Rescales volume intensity values from raw
4D timeseries

Maths:

M_Ri = raw mean value for voxel i
Y_Rit = raw value for voxel i in volume t
m_R = mean (or median) raw value of {M_Ri}

Scaled value for voxel i at time t
Y_it = 100*(Y_Rit-M_Ri)/m_R

see equation 1:
https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE

Parameters
----------
raw_data: raw 4D timeseries

Returns
-------
data : Scaled 4D timeseries
"""

# overall mean value
m_R = np.mean(raw_data)

Check warning on line 72 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L72

Added line #L72 was not covered by tests

scaled_data = np.zeros(raw_data.shape)

Check warning on line 74 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L74

Added line #L74 was not covered by tests

# initiate loop over x,y,z
for x_value in range(raw_data.shape[0]):
for y_value in range(raw_data.shape[1]):
for z_value in range(raw_data.shape[2]):

Check warning on line 79 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L77-L79

Added lines #L77 - L79 were not covered by tests

# calculate mean for voxel timeseries
M_Ri = np.mean(raw_data[x_value,y_value,z_value,:])

Check warning on line 82 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L82

Added line #L82 was not covered by tests

# loop over volumes
for t_value in range(raw_data.shape[3]):

Check warning on line 85 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L85

Added line #L85 was not covered by tests

# get individual raw intensities
Y_Rit = raw_data[x_value,y_value,z_value,t_value]

Check warning on line 88 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L88

Added line #L88 was not covered by tests
# calculate scaled intensity
Y_it = 100*(Y_Rit-M_Ri)/m_R
scaled_data[x_value,y_value,z_value,t_value] = Y_it

Check warning on line 91 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L90-L91

Added lines #L90 - L91 were not covered by tests

return scaled_data

Check warning on line 93 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L93

Added line #L93 was not covered by tests

def dvars_chisquared_stat(data):
""" Calculate chi squared stats for and degrees of freedom for 4D timeseries

Maths:

X(dvars_t) = 2*mu_0(dvars^2_t)/(variance_0)

dof = 2*mu_0^2/(variance_0)

For more information see equation 12:
For more information see equation 11:
https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE

Parameters
----------
img : nibabel image
data: 4D timeseries

Returns
-------
chi_squared_stats: 1D array
One-dimensional array with n-1 elements, where n is the number of
volumes in data.
dof: degrees of freedom
"""
# calculate dvars^2 values
dvars_squared_values = dvars_squared(data)

Check warning on line 119 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L119

Added line #L119 was not covered by tests

# calculate mean for the null distribution
null_mean = null_distribution_mean(data)

Check warning on line 122 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L122

Added line #L122 was not covered by tests
#null_mean = np.median(dvars_squared_values)

# calculate the variance for the null distribution
null_variance = null_distribution_variance(data)

Check warning on line 126 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L126

Added line #L126 was not covered by tests

# calculate chi squared stats
return 2*null_mean*(dvars_squared_values)/(null_variance), 2*null_mean**2/(null_variance)

Check warning on line 129 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L129

Added line #L129 was not covered by tests

def dvars_squared(data):
""" Calculate dvars^2 from 4D data

The dvars calculation between two volumes is defined as the square root of
(the mean of the (voxel differences squared)).

Parameters
----------
data : 4D volumes

Returns
-------
dvals^2 : 1D array
One-dimensional array with n-1 elements, where n is the number of
volumes in data.
"""

return np.mean((voxel_differences(data))**2, axis = (0,1,2))

def voxel_differences(data):
""" Calculate difference between voxel i at time t
and the same voxel at time t + 1

Maths:

Y_i(t+1)-Y_i(t)

Parameters
----------
data: 4D timeseries of shape (X,Y,Z,T)

Returns
-------
dvals : 4D array with one less volume than 'data' of shape (X,Y,Z,T-1)
"""
# create two timeseries with n-1 volumes and find the difference between them
# remove the last volume
img_start = data[...,:-1]
# remove the first volume
img_end = data[..., 1:]
return img_start - img_end

def null_distribution_mean(data):
""" Calculate mean of null distribution on 4D data
using robust IQR measurements of voxel intensity differences.

Maths:

Step 1
Calculate IQR_i/IQR_0 where IQR_i is the IQR for voxel timeseries
in location i and IQR_0 is the IQR for the standard null distribution.
see function voxel_iqr_variance

Step 2
The median of {IQR_i/IQR_0} is an estimate for the mean of the null
distribution, other estimates are available

For more information see equation 12 and 13:
https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE

Parameters
----------
data: 4D timeseries

Returns
-------
mu_0 : float
decimal to describe to mean for the standard distribution of the timeseries
estimate of the mean for the null distribution of the timeseries
"""
# calculate the IQR of all voxel timeseries
voxel_iqrs = voxel_iqr_variance(data)

# the median of the iqr
return np.median(voxel_iqrs)

def voxel_iqr_variance(data):
""" Calculate IQR of voxel timeseries differences
used to calculate mean of null distribution

For more information see equation 13:
https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE

Parameters
----------
data: 4D timeseries

Returns
-------
IQR_values: 3D array of IQR for each voxel timeseries difference
"""
# from the data calculate the differences between voxels at t and t+1
all_voxel_differences = voxel_differences(data)
# all_voxel_differences = np.sqrt(dvars_squared(data))

# initiate variable for loop
iqr_dvars_values = np.zeros(data.shape[:-1])

# initiate loop over x,y,z
for x_value in range(data.shape[0]):
for y_value in range(data.shape[1]):
for z_value in range(data.shape[2]):

# calculate iqr for each voxel timeseries
q1, q3 = np.percentile(all_voxel_differences[x_value,y_value,z_value,:], [25, 75])
iqr_dvars_values[x_value,y_value,z_value] = q3 - q1

# IQR of standard normal distribution
iqr_0 = ndtri(0.75) - ndtri(0.25)

return iqr_dvars_values/iqr_0

def null_distribution_variance(data):
""" Calculate estimate of variance for null distribution on 4D data

Maths:

Step 1
Calculate dvars_squared (from function)

Step 2
Find hIQR({dvars^2_t}) the half IQR defined as the difference between
the median and lower quartile.

Step 3
Calculate the variance of the null distribution = hIQR({dvars^2_t})/hIQR_0
where hIQR_0 is half of the IQR for the standard normal distribution

For more information see equation 14:
https://www.sciencedirect.com/science/article/pii/S1053811917311229?via%3Dihub#appsecE

Parameters
----------
data: 4D timeseries

Returns
-------
variance_0 : float
estimate of the variance for the null distribution of the timeseries
"""
# calculate the variance of all voxel timeseries
voxel_variances = np.var(data, axis=3)
# sum of variances divided by sum of all intensities
return np.sum(voxel_variances)/np.sum(data)

# calculate dvars values
dvars_squared_values = dvars_squared(data)

Check warning on line 272 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L272

Added line #L272 was not covered by tests

# calculate hIQR for dvars values
q1, q2 = np.percentile(dvars_squared_values, [25, 50])
hIQR = q2 - q1

Check warning on line 276 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L275-L276

Added lines #L275 - L276 were not covered by tests

# define hIQR_0 and calculate null distribution variance
hIQR_0 = (ndtri(0.75) - ndtri(0.25))/2
return hIQR/hIQR_0

Check warning on line 280 in findoutlie/dvars_distribution.py

View check run for this annotation

Codecov / codecov/patch

findoutlie/dvars_distribution.py#L279-L280

Added lines #L279 - L280 were not covered by tests
82 changes: 70 additions & 12 deletions findoutlie/tests/test_dvars_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,79 @@
"""

import numpy as np
from findoutlie.dvars_distribution import distribution_mean
from findoutlie.dvars_distribution import null_distribution_mean, voxel_differences, voxel_iqr_variance, dvars_squared
from scipy.special import ndtri
from math import isclose
import nipraxis as npx
import nibabel as nib

def test_dvars_distribution():
# create test volume timeseries filled with normally distributed random numbers
TEST_NORMALISED_VOLUMES = np.random.normal(size = (10,10,10,1000))


def test_null_distribution_mean():

# create example matrices
example_values1 = np.ones((3,3,3,6))
example_values2 = np.append(example_values1,example_values1+1, axis=3)
example_values2 = np.zeros((3,3,3,12))

for time_values in range(example_values2.shape[-1]):
example_values2[...,time_values] = time_values**2

example_answer2_array = voxel_differences(example_values2)[1,1,1,:]
q1, q3 = np.percentile(example_answer2_array, [25, 75])
answer2 = (q3 - q1)/(ndtri(0.75) - ndtri(0.25))

assert null_distribution_mean(example_values1) == 0
assert isclose(null_distribution_mean(example_values2), answer2)
#assert isclose(null_distribution_mean(TEST_NORMALISED_VOLUMES), 0)


def test_voxel_differences():
example_values = np.ones((3,3,3,6))
example_values1 = np.append(example_values,example_values+1, axis=3)
example_answer3 = TEST_NORMALISED_VOLUMES[...,1]-TEST_NORMALISED_VOLUMES[...,0]

# check correct shape for answer
assert voxel_differences(example_values).shape == (3,3,3,5)

# check correct order of operations and values
example_answer1 = np.zeros(example_values.shape[:-1]+(example_values.shape[-1] - 1,))
example_answer1 = np.append(example_answer1,example_values, axis=3)
assert (voxel_differences(example_values1) == example_answer1).all
assert (voxel_differences(TEST_NORMALISED_VOLUMES)[...,0] == example_answer3).all


def test_voxel_iqr_variance():

example_values = np.zeros((3,3,3,12))
for time_values in range(example_values.shape[-1]):
example_values[...,time_values] = time_values

q1, q3 = np.percentile(example_values[1,1,1,:], [25, 75])
example_answer = np.zeros(example_values.shape[:-1]) + q3 - q1
assert example_answer.shape == voxel_iqr_variance(example_values).shape
assert (example_answer == voxel_iqr_variance(example_values)).all


# Values for exmple 2
example2_intensity_sum = 27*6 + 27*6*2
example2_variance = 0.25
example2_variance_sum = 0.25*27
example_distr_mean2 = example2_variance_sum/example2_intensity_sum
TEST_FNAME = npx.fetch_file('ds114_sub009_t2r1.nii')

dvars_mean1 = distribution_mean(example_values1)
dvars_mean2 = distribution_mean(example_values2)
def test_dvars_squared():
img = nib.load(TEST_FNAME)
n_trs = img.shape[-1]
n_voxels = np.prod(img.shape[:-1])
data = img.get_fdata()
dvals = dvars_squared(data)
assert len(dvals) == n_trs - 1
# Calculate the values the long way round
data = img.get_fdata()
prev_vol = data[..., 0]
long_dvals = []
for i in range(1, n_trs):
this_vol = data[..., i]
d = this_vol - prev_vol
long_dvals.append(np.sum(d ** 2) / n_voxels)
prev_vol = this_vol
assert np.allclose(dvals, long_dvals)

assert dvars_mean1 == 0
assert dvars_mean2 == example_distr_mean2