First level residuals (#410)

jdkent · Gilles86 · bthirion · kchawla-pi · commit 839f8c9a5dcb · 2020-01-13T11:38:16.000+01:00
* Allow for storing residuals in First Level Models * import missing modules and update calls to functions * fix flake8 errors * remove @set_attr_on_read * modify tests to reflect updated code base * fix typo and simplifiy loop * respond to review comments: - add docstrings - re-add @setattr_on_read - change rsq to r_square * change rsq to r_square * change rsq to r_square in tests * fix function calls * Example of how to use for . * Made ValueError for storing model attributes more verbose. * Also include R-squared * fix heading underlines in example * fix grammar Co-Authored-By: bthirion <bertrand.thirion@inria.fr> * fix code formatting and do not standardize * change parameter timeseries to result_as_time_series * attempt to address @bthirion comments * split imports statements * always return list get_voxelwise_model_attribute_ * change docstrings for output to always be a list * modify tests to treat output as list * make _get_voxelwise_model_attribute private and improve documentation * fix formatting of function call * add empty line back in * revert regression.py to master * make result_as_time_series mandatory * add newlines to docs * add newline to end of file * fix missing newline * add James Kent to .mailmap * add entry for the new attributes to FirstLevelModel Co-authored-by: Gilles de Hollander <Gilles.de.Hollander@gmail.com> Co-authored-by: bthirion <bertrand.thirion@inria.fr> Co-authored-by: Kshitij Chawla <kc.insight.pi@gmail.com>
diff --git a/.mailmap b/.mailmap
@@ -21,6 +21,9 @@ Fabian Pedregosa <f@bianp.net>
 Franz Liem <franz.liem@gmail.com>
 Gael Varoquaux <gael.varoquaux@normalesup.org>
 Greg Kiar <gkiar07@gmail.com>
+James D. Kent <james-kent@uiowa.edu>
+James D. Kent <james-kent@uiowa.edu> <jamesdkent21@gmail.com>
+James D. Kent <james-kent@uiowa.edu> Fred Mertz <mertzf@bargle.argle>
 Jan Margeta <jmargeta@gmail.com>
 Jaques Grobler <jaquesgrobler@gmail.com>
 Jason Gors <jason.gors.work@gmail.com>
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -9,6 +9,9 @@
 New
 ---
 
+* :func:`nistats.first_level_model.FirstLevelModel` now has the attributes: ``residuals``, ``predicted``, and ``r_square``
+  which returns a Niimg-like object in the same shape as the input Niimg-like object.
+  Additionally, there is an example showcasing the use of the attributes.
 * Use :func:`nistats.reporting.make_glm_report` to easily generate HTML reports from fitted first and second level models and contrasts.
 * New dataset fetcher, :func:`nistats.datasets.fetch_language_localizer_demo_dataset` , BIDS 1.2 compatible.
 * New example showcasing the use of a GLM to get beta maps for decoding experiments (aka beta-regression).
diff --git a/examples/02_first_level_models/plot_predictions_residuals.py b/examples/02_first_level_models/plot_predictions_residuals.py
@@ -0,0 +1,171 @@
+"""
+Predicted time series and residuals
+===================================
+
+Here we fit a First Level GLM with the `minimize_memory`-argument set to `False`.
+By doing so, the `FirstLevelModel`-object stores the residuals, which we can then inspect.
+Also, the predicted time series can be extracted, which is useful to assess the quality of the model fit.
+"""
+
+
+#########################################################################
+# Import modules
+# --------------
+from nistats.datasets import fetch_spm_auditory
+from nilearn import image
+from nilearn import masking
+import pandas as pd
+
+
+# load fMRI data
+subject_data = fetch_spm_auditory()
+fmri_img = image.concat_imgs(subject_data.func)
+
+# Make an average
+mean_img = image.mean_img(fmri_img)
+mask = masking.compute_epi_mask(mean_img)
+
+# Clean and smooth data
+fmri_img = image.clean_img(fmri_img, standardize=False)
+fmri_img = image.smooth_img(fmri_img, 5.)
+
+# load events
+events = pd.read_table(subject_data['events'])
+
+
+#########################################################################
+# Fit model
+# ---------
+# Note that `minimize_memory` is set to `False` so that `FirstLevelModel`
+# stores the residuals.
+# `signal_scaling` is set to False, so we keep the same scaling as the
+# original data in `fmri_img`.
+from nistats.first_level_model import FirstLevelModel
+
+fmri_glm = FirstLevelModel(t_r=7,
+                           drift_model='cosine',
+                           signal_scaling=False,
+                           mask_img=mask,
+                           minimize_memory=False)
+
+fmri_glm = fmri_glm.fit(fmri_img, events)
+
+
+#########################################################################
+# Calculate and plot contrast
+# ---------------------------
+from nilearn import plotting
+
+z_map = fmri_glm.compute_contrast('active - rest')
+
+plotting.plot_stat_map(z_map, bg_img=mean_img, threshold=3.1)
+
+#########################################################################
+# Extract the largest clusters
+# ----------------------------
+from nistats.reporting import get_clusters_table
+from nilearn import input_data
+
+table = get_clusters_table(z_map, stat_threshold=3.1,
+                           cluster_threshold=20).set_index('Cluster ID', drop=True)
+table.head()
+
+# get the 6 largest clusters' max x, y, and z coordinates
+coords = table.loc[range(1, 7), ['X', 'Y', 'Z']].values
+
+# extract time series from each coordinate
+masker = input_data.NiftiSpheresMasker(coords)
+real_timeseries = masker.fit_transform(fmri_img)
+predicted_timeseries = masker.fit_transform(fmri_glm.predicted[0])
+
+
+#########################################################################
+# Plot predicted and actual time series for 6 most significant clusters
+# ---------------------------------------------------------------------
+import matplotlib.pyplot as plt
+
+# colors for each of the clusters
+colors = ['blue', 'navy', 'purple', 'magenta', 'olive', 'teal']
+# plot the time series and corresponding locations
+fig1, axs1 = plt.subplots(2, 6)
+for i in range(0, 6):
+    # plotting time series
+    axs1[0, i].set_title('Cluster peak {}\n'.format(coords[i]))
+    axs1[0, i].plot(real_timeseries[:, i], c=colors[i], lw=2)
+    axs1[0, i].plot(predicted_timeseries[:, i], c='r',  ls='--', lw=2)
+    axs1[0, i].set_xlabel('Time')
+    axs1[0, i].set_ylabel('Signal intensity', labelpad=0)
+    # plotting image below the time series
+    roi_img = plotting.plot_stat_map(
+        z_map, cut_coords=[coords[i][2]], threshold=3.1, figure=fig1,
+        axes=axs1[1, i], display_mode='z', colorbar=False, bg_img=mean_img)
+    roi_img.add_markers([coords[i]], colors[i], 300)
+fig1.set_size_inches(24, 14)
+
+
+#########################################################################
+# Get residuals
+# -------------
+resid = masker.fit_transform(fmri_glm.residuals[0])
+
+
+#########################################################################
+# Plot distribution of residuals
+# ------------------------------
+# Note that residuals are not really distributed normally.
+fig2, axs2 = plt.subplots(2, 3)
+axs2 = axs2.flatten()
+for i in range(0, 6):
+    axs2[i].set_title('Cluster peak {}\n'.format(coords[i]))
+    axs2[i].hist(resid[:, i], color=colors[i])
+    print('Mean residuals: {}'.format(resid[:, i].mean()))
+
+fig2.set_size_inches(12, 7)
+fig2.tight_layout()
+
+
+#########################################################################
+# Plot R-squared
+# --------------
+# Because we stored the residuals, we can plot the R-squared: the proportion
+# of explained variance of the GLM as a whole. Note that the R-squared is markedly
+# lower deep down the brain, where there is more physiological noise and we 
+# are further away from the receive coils. However, R-Squared should be interpreted
+# with a grain of salt. The R-squared value will necessarily increase with
+# the addition of more factors (such as rest, active, drift, motion) into the GLM.
+# Additionally, we are looking at the overall fit of the model, so we are
+# unable to say whether a voxel/region has a large R-squared value because
+# the voxel/region is responsive to the experiment (such as active or rest)
+# or because the voxel/region fits the noise factors (such as drift or motion)
+# that could be present in the GLM. To isolate the influence of the experiment,
+# we can use an F-test as shown in the next section.
+plotting.plot_stat_map(fmri_glm.r_square[0],
+                       bg_img=mean_img, threshold=.1, display_mode='z', cut_coords=7)
+
+
+#########################################################################
+# Calculate and Plot F-test
+# -------------------------
+# The F-test tells you how well the GLM fits effects of interest such as 
+# the active and rest conditions together. This is different from R-squared,
+# which tells you how well the overall GLM fits the data, including active, rest
+# and all the other columns in the design matrix such as drift and motion.
+import numpy as np
+
+design_matrix = fmri_glm.design_matrices_[0]
+
+# contrast with a one for "active" and zero everywhere else
+active = np.array([1 if c == 'active' else 0 for c in design_matrix.columns])
+
+# contrast with a one for "rest" and zero everywhere else
+rest = np.array([1 if c == 'rest' else 0 for c in design_matrix.columns])
+
+effects_of_interest = np.vstack((active, rest))
+# f-test for rest and activity
+z_map_ftest = fmri_glm.compute_contrast(
+    effects_of_interest,
+    stat_type='F',
+    output_type='z_score')
+
+plotting.plot_stat_map(z_map_ftest,
+                       bg_img=mean_img, threshold=3.1, display_mode='z', cut_coords=7)
diff --git a/nistats/first_level_model.py b/nistats/first_level_model.py
@@ -18,6 +18,7 @@
 import numpy as np
 import pandas as pd
 from nibabel import Nifti1Image
+from nibabel.onetime import setattr_on_read
 
 from sklearn.base import (BaseEstimator,
                           clone,
@@ -36,6 +37,7 @@
 from .regression import (ARModel,
                          OLSModel,
                          SimpleRegressionResults,
+                         RegressionResults
                          )
 from .utils import (_basestring,
                     _check_run_tables,
@@ -117,14 +119,14 @@ def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0):
     acceptable_noise_models = ['ar1', 'ols']
     if noise_model not in acceptable_noise_models:
         raise ValueError(
-            "Acceptable noise models are {0}. You provided 'noise_model={1}'".\
-                format(acceptable_noise_models, noise_model))
+            "Acceptable noise models are {0}. You provided 'noise_model={1}'".
+            format(acceptable_noise_models, noise_model))
 
     if Y.shape[0] != X.shape[0]:
         raise ValueError(
             'The number of rows of Y should match the number of rows of X.'
-            ' You provided X with shape {0} and Y with shape {1}'.\
-                format(X.shape, Y.shape))
+            ' You provided X with shape {0} and Y with shape {1}'.
+            format(X.shape, Y.shape))
 
     # Create the model
     ols_result = OLSModel(X).fit(Y)
@@ -309,6 +311,7 @@ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover',
         else:
             raise ValueError('signal_scaling must be "False", "0", "1"'
                              ' or "(0, 1)"')
+
         self.noise_model = noise_model
         self.verbose = verbose
         self.n_jobs = n_jobs
@@ -583,6 +586,96 @@ def compute_contrast(self, contrast_def, stat_type=None,
 
         return outputs if output_type == 'all' else output
 
+    def _get_voxelwise_model_attribute(self, attribute, result_as_time_series):
+        """Transform RegressionResults instances within a dictionary
+        (whose keys represent the autoregressive coefficient under the 'ar1'
+        noise model or only 0.0 under 'ols' noise_model and values are the
+        RegressionResults instances) into input nifti space.
+
+        Parameters
+        ----------
+        attribute : str
+            an attribute of a RegressionResults instance.
+            possible values include: resid, norm_resid, predicted,
+            SSE, r_square, MSE.
+        result_as_time_series : bool
+            whether the RegressionResult attribute has a value
+            per timepoint of the input nifti image.
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        # check if valid attribute is being accessed.
+        all_attributes = dict(vars(RegressionResults)).keys()
+        possible_attributes = [prop for prop in all_attributes if '__' not in prop]
+        if attribute not in possible_attributes:
+            msg = "attribute must be one of: {attr}".format(attr=possible_attributes)
+            raise ValueError(msg)
+
+        if self.minimize_memory:
+            raise ValueError('To access voxelwise attributes like R-squared, residuals, '
+                    'and predictions, the `FirstLevelModel`-object needs to store '
+                    'there attributes. To do so, set `minimize_memory` to `False` '
+                    'when initializing the `FirstLevelModel`-object.')
+
+        if self.labels_ is None or self.results_ is None:
+            raise ValueError('The model has not been fit yet')
+
+        output = []
+
+        for design_matrix, labels, results in zip(self.design_matrices_, self.labels_, self.results_):        
+
+            if result_as_time_series:
+                voxelwise_attribute = np.zeros((design_matrix.shape[0], len(labels)))
+            else:
+                voxelwise_attribute = np.zeros((1, len(labels)))
+
+            for label_ in results:
+                label_mask = labels == label_
+                voxelwise_attribute[:, label_mask] = getattr(results[label_], attribute)
+
+            output.append(self.masker_.inverse_transform(voxelwise_attribute))
+
+            return output
+
+    @setattr_on_read
+    def residuals(self):
+        """Transform voxelwise residuals to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('resid', result_as_time_series=True)
+
+    @setattr_on_read
+    def predicted(self):
+        """Transform voxelwise predicted values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('predicted', result_as_time_series=True)
+
+    @setattr_on_read
+    def r_square(self):
+        """Transform voxelwise r-squared values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('r_square', result_as_time_series=False)
+
 
 @replace_parameters({'mask': 'mask_img'}, end_version='next')
 def first_level_models_from_bids(
diff --git a/nistats/regression.py b/nistats/regression.py
@@ -276,6 +276,7 @@ def __init__(self, theta, Y, model, wY, wresid, cov=None, dispersion=1.,
                                         dispersion, nuisance)
         self.wY = wY
         self.wresid = wresid
+        self.wdesign = model.wdesign
 
     @setattr_on_read
     def resid(self):
@@ -310,7 +311,7 @@ def predicted(self):
         """
         beta = self.theta
         # the LikelihoodModelResults has parameters named 'theta'
-        X = self.model.design
+        X = self.wdesign
         return np.dot(X, beta)
 
     @setattr_on_read
@@ -319,6 +320,13 @@ def SSE(self):
         """
         return (self.wresid ** 2).sum(0)
 
+    @setattr_on_read
+    def r_square(self):
+        """Proportion of explained variance.
+        If not from an OLS model this is "pseudo"-R2.
+        """
+        return np.var(self.predicted, 0) / np.var(self.wY, 0)
+
     @setattr_on_read
     def MSE(self):
         """ Mean square (error) """
diff --git a/nistats/tests/test_first_level_model.py b/nistats/tests/test_first_level_model.py
diff --git a/nistats/tests/test_regression.py b/nistats/tests/test_regression.py

-Original file line number
+Diff line change
 Franz Liem <[email protected]>
 Gael Varoquaux <[email protected]>
 Greg Kiar <[email protected]>
 +James D. Kent <[email protected]>
 +James D. Kent <[email protected]> <[email protected]>
 +James D. Kent <[email protected]> Fred Mertz <[email protected]>
 Jan Margeta <[email protected]>
 Jaques Grobler <[email protected]>
 Jason Gors <[email protected]>