[ENH] make ALE/FWE faster (#999)

jdkent · web-flow · commit c4adf4735053 · 2026-04-02T18:53:42.000-05:00
* make ALE faster

jit the approximate-null histogram update
change monte carlo/FWE permutation to pass around precomputed ijk voxel indices instead of doing xyz-&gt;ijk each iteration

* change the null-approximate calculation:

�[200~kept the simple per-study loop and removed the slower “all-in-one” compiled histogram path
replaced int(np.floor(...)) with direct truncation for nonnegative binning in _study_ma_histogram and _update_ale_histogram
normalized study histograms with a precomputed reciprocal instead of exp_hist.sum()
stopped forcing extra per-study float64 and astype(...) copies before histogram merging~

* style fix and test

* fix style

* fix style

* fix outdated comments and add edge case tests

* fix style
diff --git a/nimare/meta/cbma/ale.py b/nimare/meta/cbma/ale.py
@@ -9,6 +9,7 @@
 import numpy as np
 import pandas as pd
 from joblib import Memory, Parallel, delayed
+from numba import jit
 from scipy import ndimage
 from scipy import sparse as sp_sparse
 from tqdm.auto import tqdm
@@ -66,6 +67,47 @@ def _compute_ale_summarystat(ma_values):
     raise ValueError(f"Unsupported data type '{type(ma_values)}'")
 
 
+@jit(nopython=True, cache=True)
+def _study_ma_histogram(study_ma_values, n_zero_voxels, mask_voxel_recip, inv_step_size, n_bins):
+    """Bin one study's nonzero ALE values onto the fixed approximate-null grid."""
+    exp_hist = np.zeros(n_bins, dtype=np.float64)
+    for i_val in range(study_ma_values.shape[0]):
+        idx = int(study_ma_values[i_val] * inv_step_size)
+        if idx < 0:
+            idx = 0
+        elif idx >= n_bins:
+            idx = n_bins - 1
+        exp_hist[idx] += 1.0
+
+    exp_hist[0] += n_zero_voxels
+    exp_hist *= mask_voxel_recip
+    return exp_hist
+
+
+@jit(nopython=True, cache=True)
+def _update_ale_histogram(
+    ale_idx, ale_probs, exp_idx, exp_probs, bin_centers, inv_step_size, n_bins, out
+):
+    """Combine two nonzero ALE histograms using a reusable output buffer."""
+    for i_bin in range(n_bins):
+        out[i_bin] = 0.0
+
+    for i_exp in range(exp_idx.shape[0]):
+        exp_center = bin_centers[exp_idx[i_exp]]
+        exp_prob = exp_probs[i_exp]
+        exp_one_minus = 1.0 - exp_center
+        for i_ale in range(ale_idx.shape[0]):
+            score = 1.0 - exp_one_minus * (1.0 - bin_centers[ale_idx[i_ale]])
+            score_idx = int(score * inv_step_size)
+            if score_idx < 0:
+                score_idx = 0
+            elif score_idx >= n_bins:
+                score_idx = n_bins - 1
+            out[score_idx] += exp_prob * ale_probs[i_ale]
+
+    return out
+
+
 def _collect_masked_ma_maps(estimator, coords_key="coordinates", maps_key="ma_maps"):
     """Collect ALE-family MA maps in masked CSR form."""
     estimator._study_max_ma_values = None
@@ -345,18 +387,18 @@ def _compute_null_approximate(self, ma_maps):
 
         assert "histogram_bins" in self.null_distributions_.keys()
 
-        # Derive bin edges from histogram bin centers for numpy histogram function
-        bin_centers = self.null_distributions_["histogram_bins"]
+        # Reuse the fixed histogram grid derived earlier in _determine_histogram_bins.
+        bin_centers = self.null_distributions_["histogram_bins"].astype(np.float64, copy=False)
         step_size = bin_centers[1] - bin_centers[0]
         inv_step_size = 1 / step_size
-        bin_edges = bin_centers - (step_size / 2)
-        bin_edges = np.append(bin_centers, bin_centers[-1] + step_size)
-
+        n_bins = bin_centers.shape[0]
+        mask_voxel_recip = 1.0 / self.__n_mask_voxels
         n_exp = ma_maps.shape[0]
         data = ma_maps.data
         indptr = ma_maps.indptr
 
         ale_hist = None
+        tmp_hist = np.zeros(n_bins, dtype=np.float64)
         for exp_idx in range(n_exp):
             start = indptr[exp_idx]
             end = indptr[exp_idx + 1]
@@ -365,32 +407,31 @@ def _compute_null_approximate(self, ma_maps):
             n_nonzero_voxels = study_ma_values.shape[0]
             n_zero_voxels = self.__n_mask_voxels - n_nonzero_voxels
 
-            exp_hist = np.histogram(study_ma_values, bins=bin_edges, density=False)[0].astype(
-                float
+            exp_hist = _study_ma_histogram(
+                study_ma_values,
+                n_zero_voxels,
+                mask_voxel_recip,
+                inv_step_size,
+                n_bins,
             )
-            exp_hist[0] += n_zero_voxels
-            exp_hist /= exp_hist.sum()
 
             if ale_hist is None:
                 ale_hist = exp_hist.copy()
                 continue
 
-            # Find histogram bins with nonzero values for each histogram.
             ale_idx = np.where(ale_hist > 0)[0]
-            exp_idx = np.where(exp_hist > 0)[0]
-
-            # Compute output MA values, ale_hist indices, and probabilities
-            ale_scores = (
-                1 - np.outer((1 - bin_centers[exp_idx]), (1 - bin_centers[ale_idx])).ravel()
+            exp_hist_idx = np.where(exp_hist > 0)[0]
+            _update_ale_histogram(
+                ale_idx,
+                ale_hist[ale_idx],
+                exp_hist_idx,
+                exp_hist[exp_hist_idx],
+                bin_centers,
+                inv_step_size,
+                n_bins,
+                tmp_hist,
             )
-            score_idx = np.floor(ale_scores * inv_step_size).astype(int)
-            probabilities = np.outer(exp_hist[exp_idx], ale_hist[ale_idx]).ravel()
-
-            # Reset histogram and set probabilities.
-            # Use at() instead of setting values directly (ale_hist[score_idx] = probabilities)
-            # because there can be redundant values in score_idx.
-            ale_hist = np.zeros(ale_hist.shape)
-            np.add.at(ale_hist, score_idx, probabilities)
+            ale_hist, tmp_hist = tmp_hist, ale_hist
 
         self.null_distributions_["histweights_corr-none_method-approximate"] = ale_hist
 
diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py
@@ -28,7 +28,6 @@
     _mask_img_to_bool,
     get_masker,
     mm2vox,
-    vox2mm,
 )
 
 LGR = logging.getLogger(__name__)
@@ -590,7 +589,7 @@ def _compute_null_reduced_montecarlo(self, ma_maps, n_iters=5000):
         null_dist = self._compute_summarystat(iter_ma_values)
         self.null_distributions_["values_corr-none_method-reducedMontecarlo"] = null_dist
 
-    def _compute_null_montecarlo_permutation(self, iter_xyz, iter_df, bin_edges=None):
+    def _compute_null_montecarlo_permutation(self, iter_ijk, iter_df, bin_edges=None):
         """Run a single Monte Carlo permutation of a dataset.
 
         Does the shared work between uncorrected stat-to-p conversion and vFWE.
@@ -610,8 +609,8 @@ def _compute_null_montecarlo_permutation(self, iter_xyz, iter_df, bin_edges=None
         # be safe.
         iter_df = iter_df.copy()
 
-        iter_xyz = np.squeeze(iter_xyz)
-        iter_df[["x", "y", "z"]] = iter_xyz
+        iter_ijk = np.squeeze(iter_ijk)
+        iter_df[["i", "j", "k"]] = iter_ijk
 
         iter_ma_maps = self.kernel_transformer.transform(
             iter_df, masker=self.masker, return_type="sparse"
@@ -655,9 +654,8 @@ def _compute_null_montecarlo(self, n_iters, n_cores):
             size=(self.inputs_["coordinates"].shape[0], n_iters),
         )
         rand_ijk = null_ijk[rand_idx, :]
-        rand_xyz = vox2mm(rand_ijk, self.masker.mask_img.affine)
-        iter_xyzs = np.split(rand_xyz, rand_xyz.shape[1], axis=1)
-        iter_df = self.inputs_["coordinates"].copy()
+        iter_ijks = np.split(rand_ijk, rand_ijk.shape[1], axis=1)
+        iter_df = self.inputs_["coordinates"].drop(columns=["x", "y", "z"], errors="ignore").copy()
         parallel_kwargs = {"return_as": "generator", "n_jobs": n_cores}
         if getattr(self, "_permutation_parallel_backend", None) is not None:
             parallel_kwargs["backend"] = self._permutation_parallel_backend
@@ -669,7 +667,7 @@ def _compute_null_montecarlo(self, n_iters, n_cores):
 
         perm_histograms = Parallel(**parallel_kwargs)(
             delayed(self._compute_null_montecarlo_permutation)(
-                iter_xyzs[i_iter],
+                iter_ijks[i_iter],
                 iter_df=iter_df,
                 bin_edges=bin_edges,
             )
@@ -693,7 +691,7 @@ def _compute_null_montecarlo(self, n_iters, n_cores):
 
     def _correct_fwe_montecarlo_permutation(
         self,
-        iter_xyz,
+        iter_ijk,
         iter_df,
         conn,
         voxel_thresh,
@@ -705,9 +703,9 @@ def _correct_fwe_montecarlo_permutation(
 
         Parameters
         ----------
-        iter_xyz : :obj:`numpy.ndarray` of shape (C, 3)
-            The permuted coordinates. One row for each peak.
-            Columns correspond to x, y, and z coordinates.
+        iter_ijk : :obj:`numpy.ndarray` of shape (C, 3)
+            The permuted matrix indices. One row for each peak.
+            Columns correspond to i, j, and k coordinates.
         iter_df : :obj:`pandas.DataFrame`
             The coordinates DataFrame, to be filled with the permuted coordinates in ``iter_xyz``
             before permutation MA maps are generated.
@@ -727,8 +725,8 @@ def _correct_fwe_montecarlo_permutation(
         """
         iter_df = iter_df.copy()
 
-        iter_xyz = np.squeeze(iter_xyz)
-        iter_df[["x", "y", "z"]] = iter_xyz
+        iter_ijk = np.squeeze(iter_ijk)
+        iter_df[["i", "j", "k"]] = iter_ijk
 
         iter_ma_maps = self.kernel_transformer.transform(
             iter_df, masker=self.masker, return_type="sparse"
@@ -864,23 +862,22 @@ def correct_fwe_montecarlo(
                     "Running permutations from scratch."
                 )
 
-            null_xyz = vox2mm(
-                np.vstack(np.where(_mask_img_to_bool(self.masker.mask_img))).T,
-                self.masker.mask_img.affine,
-            )
+            null_ijk = np.vstack(np.where(_mask_img_to_bool(self.masker.mask_img))).T
 
             n_cores = _check_ncores(n_cores)
 
             # Identify summary statistic corresponding to intensity threshold
             ss_thresh = self._p_to_summarystat(voxel_thresh)
 
             rand_idx = np.random.choice(
-                null_xyz.shape[0],
+                null_ijk.shape[0],
                 size=(self.inputs_["coordinates"].shape[0], n_iters),
             )
-            rand_xyz = null_xyz[rand_idx, :]
-            iter_xyzs = np.split(rand_xyz, rand_xyz.shape[1], axis=1)
-            iter_df = self.inputs_["coordinates"].copy()
+            rand_ijk = null_ijk[rand_idx, :]
+            iter_ijks = np.split(rand_ijk, rand_ijk.shape[1], axis=1)
+            iter_df = (
+                self.inputs_["coordinates"].drop(columns=["x", "y", "z"], errors="ignore").copy()
+            )
             parallel_kwargs = {"return_as": "generator", "n_jobs": n_cores}
             if getattr(self, "_permutation_parallel_backend", None) is not None:
                 parallel_kwargs["backend"] = self._permutation_parallel_backend
@@ -890,7 +887,7 @@ def correct_fwe_montecarlo(
 
             perm_results = Parallel(**parallel_kwargs)(
                 delayed(self._correct_fwe_montecarlo_permutation)(
-                    iter_xyzs[i_iter],
+                    iter_ijks[i_iter],
                     iter_df=iter_df,
                     conn=conn,
                     voxel_thresh=ss_thresh,
diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py
@@ -151,6 +151,9 @@ def transform(self, dataset, masker=None, return_type="image"):
         dataset : :obj:`~nimare.dataset.Dataset`, :obj:`~nimare.nimads.Studyset`, \
                 or :obj:`pandas.DataFrame`
             Collection for which to make images. Can be a DataFrame if necessary.
+            DataFrame inputs may provide precomputed matrix indices in ``i``, ``j``, and ``k``.
+            When those columns are present, they are used directly and ``x``, ``y``, and ``z``
+            are ignored.
         masker : img_like or None, optional
             Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
             If None, the input collection's masker attribute will be used.
@@ -195,11 +198,12 @@ def transform(self, dataset, masker=None, return_type="image"):
                 masker is not None
             ), "Argument 'masker' must be provided if dataset is a DataFrame."
             mask = masker.mask_img
-            coordinates = dataset
+            coordinates = dataset.copy()
 
-            # Calculate IJK. Must assume that the masker is in same space,
-            # but has different affine, from original IJK.
-            coordinates[["i", "j", "k"]] = mm2vox(dataset[["x", "y", "z"]], mask.affine)
+            if not {"i", "j", "k"}.issubset(coordinates.columns):
+                # Calculate IJK. Must assume that the masker is in same space,
+                # but has different affine, from original IJK.
+                coordinates[["i", "j", "k"]] = mm2vox(dataset[["x", "y", "z"]], mask.affine)
         else:
             if not isinstance(dataset, Dataset):
                 dataset = normalize_collection(dataset)
diff --git a/nimare/tests/test_meta_ale.py b/nimare/tests/test_meta_ale.py
@@ -112,6 +112,38 @@ def _prepare_ale_inputs(dataset, kernel_transformer=None):
     return meta
 
 
+def _study_ma_histogram_reference(
+    study_ma_values, n_zero_voxels, mask_voxel_recip, inv_step_size, n_bins
+):
+    """Reference implementation for ALE study-histogram binning."""
+    exp_hist = np.zeros(n_bins, dtype=np.float64)
+    for value in study_ma_values:
+        idx = int(np.floor(value * inv_step_size))
+        idx = min(max(idx, 0), n_bins - 1)
+        exp_hist[idx] += 1.0
+
+    exp_hist[0] += n_zero_voxels
+    exp_hist *= mask_voxel_recip
+    return exp_hist
+
+
+def _update_ale_histogram_reference(
+    ale_idx, ale_probs, exp_idx, exp_probs, bin_centers, inv_step_size, n_bins
+):
+    """Reference implementation for ALE histogram updates."""
+    out = np.zeros(n_bins, dtype=np.float64)
+    for i_exp in range(exp_idx.shape[0]):
+        exp_center = bin_centers[exp_idx[i_exp]]
+        exp_prob = exp_probs[i_exp]
+        exp_one_minus = 1.0 - exp_center
+        for i_ale in range(ale_idx.shape[0]):
+            score = 1.0 - exp_one_minus * (1.0 - bin_centers[ale_idx[i_ale]])
+            score_idx = int(np.floor(score * inv_step_size))
+            score_idx = min(max(score_idx, 0), n_bins - 1)
+            out[score_idx] += exp_prob * ale_probs[i_ale]
+    return out
+
+
 def test_ALE_missing_sample_sizes_raises_informative_error(testdata_cbma_full):
     """Raise a helpful error listing ids when sample sizes are missing."""
     dset = copy.deepcopy(testdata_cbma_full)
@@ -374,6 +406,69 @@ def test_ALE_csr_approximate_null_matches_dense_reference():
     )
 
 
+def test_ALE_study_ma_histogram_edge_bins():
+    """Study histogram binning should match the legacy floor-based implementation at edges."""
+    inv_step_size = 10.0
+    n_bins = 11
+    n_zero_voxels = 3
+    mask_voxel_recip = 1.0 / (n_zero_voxels + 6)
+    study_ma_values = np.array(
+        [0.0, 0.099999999, 0.1, 0.199999999, 0.9, 0.999999999],
+        dtype=np.float64,
+    )
+
+    actual = ale._study_ma_histogram(
+        study_ma_values,
+        n_zero_voxels,
+        mask_voxel_recip,
+        inv_step_size,
+        n_bins,
+    )
+    expected = _study_ma_histogram_reference(
+        study_ma_values,
+        n_zero_voxels,
+        mask_voxel_recip,
+        inv_step_size,
+        n_bins,
+    )
+
+    np.testing.assert_allclose(actual, expected)
+
+
+def test_ALE_update_histogram_edge_bins():
+    """Histogram updates should match the legacy floor-based implementation at bin edges."""
+    bin_centers = np.linspace(0.0, 1.0, 11, dtype=np.float64)
+    inv_step_size = 10.0
+    n_bins = bin_centers.shape[0]
+    ale_idx = np.array([0, 1, 9, 10], dtype=np.int64)
+    exp_idx = np.array([0, 1, 9, 10], dtype=np.int64)
+    ale_probs = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float64)
+    exp_probs = np.array([0.4, 0.3, 0.2, 0.1], dtype=np.float64)
+    out = np.empty(n_bins, dtype=np.float64)
+
+    actual = ale._update_ale_histogram(
+        ale_idx,
+        ale_probs,
+        exp_idx,
+        exp_probs,
+        bin_centers,
+        inv_step_size,
+        n_bins,
+        out,
+    )
+    expected = _update_ale_histogram_reference(
+        ale_idx,
+        ale_probs,
+        exp_idx,
+        exp_probs,
+        bin_centers,
+        inv_step_size,
+        n_bins,
+    )
+
+    np.testing.assert_allclose(actual, expected)
+
+
 @pytest.mark.parametrize(
     ("kernel_transformer", "sample_sizes"),
     [
diff --git a/nimare/tests/test_meta_kernel.py b/nimare/tests/test_meta_kernel.py