remydubois · remydubois · Apr 26, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 26, 2026
diff --git a/changelog.md b/changelog.md
@@ -1,6 +1,10 @@
 Changelog
 =========
 
+Version 0.5.1
+------------
+- Fixes bug when sparse matrices contain strictly negative values, cf [this discussion](https://scverse.zulipchat.com/#narrow/channel/557094-differential-gene-expression/topic/Nonnegative.20expectation/near/590691572).
+
 Version 0.5.0rc2
 ------------
 - For better compatibility with `scanpy`, lower bounds on dependencies have been relaxed.

diff --git a/illico/ovo/dense_ovo.py b/illico/ovo/dense_ovo.py
@@ -44,7 +44,7 @@ def dense_ovo_mwu_kernel(
     n = n_ref + n_tgt
     mu = n_ref * n_tgt / 2.0
     for j in range(ncols):
-        ranksum, tie_sum = rank_sum_and_ties_from_sorted(sorted_ref_data[:, j], sorted_tgt_data[:, j])
+        ranksum, tie_sum, _ = rank_sum_and_ties_from_sorted(sorted_ref_data[:, j], sorted_tgt_data[:, j])
 
         # Compute U-stat
         U1 = ranksum - n_tgt * (n_tgt + 1) / 2.0

diff --git a/illico/ovo/sparse_ovo.py b/illico/ovo/sparse_ovo.py
@@ -73,15 +73,14 @@ def single_group_sparse_ovo_mwu_kernel(
         lbr, ubr = sorted_ref_data.indptr[j], sorted_ref_data.indptr[j + 1]
 
         # Compute ranksum and tie sum for non zero values
-        ranksum, tie_sum = rank_sum_and_ties_from_sorted(sorted_ref_data.data[lbr:ubr], sorted_tgt_data.data[lbt:ubt])
-
-        # Offset the ranks of the number of zeros in ref and perturbed
-        ranksum += n_zeros_combined * (ubt - lbt)
+        nz_ranksum, tie_sum, zpos = rank_sum_and_ties_from_sorted(
+            sorted_ref_data.data[lbr:ubr], sorted_tgt_data.data[lbt:ubt], zero_values_offset=n_zeros_combined
+        )
 
         # Compute ranksum
         n0 = n_zeros_tgt[j]
-        R1_nz = ranksum  # Sum ranks
-        R1 = R1_nz + n0 * (n_zeros_ref[j] + n0 + 1) / 2.0  # Add sumranks of zeros
+        z_ranksum = (zpos + (n_zeros_ref[j] + n0 + 1) / 2.0) * n0  # Add sumranks of zeros
+        R1 = nz_ranksum + z_ranksum  # Add sumranks of zeros
 
         # Compute U-stat
         U1 = R1 - n_tgt * (n_tgt + 1) / 2

diff --git a/illico/ovr/dense_ovr.py b/illico/ovr/dense_ovr.py
@@ -53,7 +53,7 @@ def dense_ovr_mwu_kernel_over_contiguous_col_chunk(
     ranksums = np.zeros(shape=(grpc.counts.size, chunk.shape[1]), dtype=np.float64)
     for j in range(chunk.shape[1]):
         idxs = np.argsort(chunk[:, j])
-        col_tie_sum = _accumulate_group_ranksums_from_argsort(chunk[:, j], idxs, grpc.encoded_groups, ranksums[:, j])
+        col_tie_sum, _ = _accumulate_group_ranksums_from_argsort(chunk[:, j], idxs, grpc.encoded_groups, ranksums[:, j])
         tie_sum[j] = col_tie_sum
 
     # Compute U stats

diff --git a/illico/ovr/sparse_ovr.py b/illico/ovr/sparse_ovr.py
@@ -66,19 +66,18 @@ def sparse_ovr_mwu_kernel(
         nz_idx = X.indices[start:end]
         """Step 1: compute ranksum of non-zero elements, per group"""
         _idxs = np.argsort(X.data[start:end])
-        tie_sum = _accumulate_group_ranksums_from_argsort(X.data[start:end], _idxs, groups[nz_idx], R1_nz[:, j])
+        tie_sum, zero_pos = _accumulate_group_ranksums_from_argsort(
+            X.data[start:end], _idxs, groups[nz_idx], R1_nz[:, j], zero_values_offset=int(n_zeros[j])
+        )
         n0 = n_zeros[j]
         """Step 2: offset non-zero elements ranks by the number of zeros that precedes them"""
         if nz_idx.size:
             _add_at_scalar(nnz_per_group[:, j], groups[nz_idx], 1.0)
         # Deduce number of zeros per group
         nz_per_group = group_counts - nnz_per_group[:, j]
-        # Offset the non-zero ranks by the amount of 0 that precedes them
-        # All ranks must be shifted, so the sum is shifted by that many elements.
-        R1_nz[:, j] += n0 * nnz_per_group[:, j]
         """ Step 3: Add ranksums of zero elements, per group"""
         # add zero contribution: number of zeros * avg rank
-        R1 = R1_nz[:, j] + nz_per_group * (n0 + 1) / 2.0
+        R1 = R1_nz[:, j] + nz_per_group * (n0 + 1 + 2 * zero_pos) / 2.0
         U[:, j] = R1 - n_tgt * (n_tgt + 1) / 2
         tie_sum += n0**3 - n0
 

diff --git a/illico/utils/ranking.py b/illico/utils/ranking.py
@@ -6,8 +6,8 @@
 
 @njit(nogil=True, cache=False, fastmath=True)
 def _accumulate_group_ranksums_from_argsort(
-    arr: np.ndarray, idx: np.ndarray, groups: np.ndarray, ranksums: np.ndarray
-) -> np.ndarray:
+    arr: np.ndarray, idx: np.ndarray, groups: np.ndarray, ranksums: np.ndarray, zero_values_offset: int = 0
+) -> tuple[np.ndarray, int]:
     """From a given array of values, indices of sorted values (result of np.argsort) and groups, accumulate group rank
     sums in the placegolder `ranksums`.
 
@@ -16,41 +16,53 @@ def _accumulate_group_ranksums_from_argsort(
         idx (np.ndarray): Array of indices of sorted values
         groups (np.ndarray): Array of group indicator
         ranksums (np.ndarray): Plaeholder of shape (n_groups, ) where to accumulate rank sums
+        zero_values_offset (int): If > 0, it means that there are zeros not present in the input arrays but that
+        they should be accounted for. This is only used when the input adata is sparse, and ranksum is computed
+        on non zero values.
 
     Returns:
         np.ndarray: tie sums
+        int: position of zeros in the sorted array (if zero_values_offset > 0)
 
     Author: Rémy Dubois
 
     """
-    # if ranks is None:
-    #     ranks = np.empty(arr.size, dtype=np.float64)
+    zero_pos = -1
     n = idx.size
     i = 0
+    rank = 0
     tie_sum = 0.0
+
     while i < n:
         # find tie block
         j = i + 1
+        if zero_values_offset > 0 and arr[idx[i]] > 0:
+            zero_pos = i
+            rank += zero_values_offset
+            zero_values_offset = 0
         while j < n and arr[idx[j]] == arr[idx[i]]:
             j += 1
-        # average rank for indices i..j-1 (1-based)
-        avg_rank = 0.5 * (i + 1 + j)  # mean of [i+1, ..., j]
+        avg_rank = rank + 0.5 * (j - i + 1)
         for k in range(i, j):
             if groups is not None:
                 ranksums[groups[idx[k]]] += avg_rank
             else:
                 # If no group is specified, ranks are sum reduced into one value for the whole column
                 ranksums += avg_rank
         tie_count = j - i
-        # Tie sum is the same for all froups
+        # Tie sum is the same for all groups
         tie_sum += tie_count**3 - tie_count
         i = j
+        rank += tie_count
+
+    if zero_pos == -1 and zero_values_offset > 0:
+        zero_pos = n
 
-    return tie_sum
+    return tie_sum, zero_pos
 
 
 @njit(nogil=True)
-def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray]:
+def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray, zero_values_offset: int = 0) -> tuple[np.ndarray]:
     """Compute rank sums and tie sums from two 1-d sorted arrays.
 
     This routine is similar to the leetcode "merge two sorted arrays", except it
@@ -62,6 +74,9 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
     Args:
         A (np.ndarray): The first sorted array (controls)
         B (np.ndarray): The second sorted array (perturbed)
+        zero_values_offset (int): If > 0, it means that there are zeros not present in the input arrays but that
+        they should be accounted for. This is only used when the input adata is sparse, and ranksum is computed
+        on non zero values.
 
     Returns:
         tuple[np.ndarray]: Ranks sum from the second array, and tie sums for the combined
@@ -76,8 +91,9 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
     i = 0
     j = 0
     k = 0  # number of items processed so far (0-based)
+    zero_pos = -1  # Default setting
 
-    sum_ranks_B = 0.0
+    sum_ranks_B = 0
     tie_sum = 0.0
 
     # main sweep
@@ -88,6 +104,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
         else:
             v = B[j]
 
+        if v > 0 and zero_values_offset > 0:
+            zero_pos = k
+            k += zero_values_offset
+            zero_values_offset = 0
+
         # count occurrences in A
         tA = 0
         ii = i
@@ -118,6 +139,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
     while i < nA:
         v = A[i]
 
+        if v > 0 and zero_values_offset > 0:
+            zero_pos = k
+            k += zero_values_offset
+            zero_values_offset = 0
+
         # count in A
         tA = 0
         ii = i
@@ -139,6 +165,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
     while j < nB:
         v = B[j]
 
+        if v > 0 and zero_values_offset > 0:
+            zero_pos = k
+            k += zero_values_offset
+            zero_values_offset = 0
+
         # count in B
         tB = 0
         jj = j
@@ -156,7 +187,12 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar
         k += t
         j = jj
 
-    return sum_ranks_B, tie_sum
+    # if zero_pos is still not set, it means there are no positive values in the entire column.
+    # so zeros come at the end of it
+    if zero_pos == -1 and zero_values_offset > 0:
+        zero_pos = k
+
+    return sum_ranks_B, tie_sum, zero_pos
 
 
 @njit(nogil=True, cache=False)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "illico"
-version = "0.5.0rc2"
+version = "0.5.1"
 description = "Fast asymptotic mannwhitney-u test"
 authors = [
     {name = "remydubois",email = "remydubois14@gmail.com"}

diff --git a/src/dense_ovo.rs b/src/dense_ovo.rs
@@ -31,7 +31,7 @@ pub fn dense_ovo_kernel<D: SparseFloat>(
 
     let remainder = &n_tgt * (&n_tgt + 1.) / 2.;
     for j in 0..n_cols as usize {
-        let (rs, ts) = rank_sum_and_ties(sorted_controls.column(j), sorted_tgt.column(j));
+        let (rs, ts, _) = rank_sum_and_ties(sorted_controls.column(j), sorted_tgt.column(j), 0);
 
         let u = rs - remainder;
 

diff --git a/src/dense_ovr.rs b/src/dense_ovr.rs
@@ -33,12 +33,13 @@ pub fn dense_ovr_kernel<D: SparseFloat>(
     let mut tie_sums = Array1::zeros(chunk_ub - chunk_lb);
     for j in 0..chunk.dim().1 {
         let sorted_indices = argsort(chunk.column(j));
-        accumulate_rank_and_tie_sums_from_argsort(
+        _ = accumulate_rank_and_tie_sums_from_argsort(
             chunk.column(j),
             sorted_indices,
             grpc.encoded_groups,
             ranksums.column_mut(j),
             tie_sums.slice_mut(s![j]),
+            0,
         )?;
     }
 

diff --git a/src/ranking.rs b/src/ranking.rs
@@ -50,14 +50,19 @@ pub fn sort_along_axis_0_inplace_rust(mut x: PyReadwriteArray2<f32>) -> PyResult
     Ok(())
 }
 
-pub fn rank_sum_and_ties<D: SparseFloat>(ctrl: ArrayView1<D>, tgt: ArrayView1<D>) -> (f64, f64) {
+pub fn rank_sum_and_ties<D: SparseFloat>(
+    ctrl: ArrayView1<D>,
+    tgt: ArrayView1<D>,
+    mut zero_values_offset: usize,
+) -> (f64, f64, usize) {
     let n_ctrl = ctrl.len();
     let n_tgt = tgt.len();
 
     // Initialize pointers
     let mut i = 0;
     let mut j = 0;
     let mut k: usize = 0;
+    let mut zero_pos: isize = -1;
 
     // Initialize accumulators
     let mut rank_sum_tgt: f64 = 0.;
@@ -67,6 +72,12 @@ pub fn rank_sum_and_ties<D: SparseFloat>(ctrl: ArrayView1<D>, tgt: ArrayView1<D>
     while i < n_ctrl && j < n_tgt {
         let v = ctrl[i].min(tgt[j]);
 
+        if (v.to_f64() > 0.) & (zero_values_offset > 0) {
+            zero_pos = k as isize;
+            k += zero_values_offset;
+            zero_values_offset = 0;
+        }
+
         // Count unique values in control values
         let mut count_ctrl: usize = 0;
         let mut offset_i = i;
@@ -119,6 +130,12 @@ pub fn rank_sum_and_ties<D: SparseFloat>(ctrl: ArrayView1<D>, tgt: ArrayView1<D>
     while i < n_ctrl {
         let v = ctrl[i];
 
+        if (v.to_f64() > 0.) & (zero_values_offset > 0) {
+            zero_pos = k as isize;
+            k += zero_values_offset;
+            zero_values_offset = 0;
+        }
+
         // Count unique values in control values
         let mut count_ctrl: usize = 0;
         for l in i..n_ctrl {
@@ -140,6 +157,12 @@ pub fn rank_sum_and_ties<D: SparseFloat>(ctrl: ArrayView1<D>, tgt: ArrayView1<D>
     while j < n_tgt {
         let v = tgt[j];
 
+        if (v.to_f64() > 0.) & (zero_values_offset > 0) {
+            zero_pos = k as isize;
+            k += zero_values_offset;
+            zero_values_offset = 0;
+        }
+
         // Count unique values in target values
         let mut count_tgt: usize = 0;
         for l in j..n_tgt {
@@ -161,7 +184,11 @@ pub fn rank_sum_and_ties<D: SparseFloat>(ctrl: ArrayView1<D>, tgt: ArrayView1<D>
         k += count_tgt;
     }
 
-    return (rank_sum_tgt, tie_sum);
+    if (zero_pos == -1) & (zero_values_offset > 0) {
+        zero_pos = k as isize;
+    }
+
+    return (rank_sum_tgt, tie_sum, zero_pos as usize);
 }
 
 #[pyfunction]
@@ -171,7 +198,7 @@ pub fn rank_sum_and_ties_rust(
 ) -> (f64, f64) {
     let controls = controls.as_array();
     let target = target.as_array();
-    let (ranksum, tiesum) = rank_sum_and_ties(controls, target);
+    let (ranksum, tiesum, _) = rank_sum_and_ties(controls, target, 0);
     return (ranksum, tiesum);
 }
 
@@ -181,7 +208,8 @@ pub fn accumulate_rank_and_tie_sums_from_argsort<D: SparseFloat>(
     group_labels: ArrayView1<usize>,
     mut ranksums: ArrayViewMut1<f64>,
     mut tie_sum: ArrayViewMut0<f64>,
-) -> Result<(), String> {
+    mut zero_values_offset: usize,
+) -> Result<usize, String> {
     let n_sorted_idx = sorted_indices.len();
     let n_values = x.len();
     let n_grp_indices = group_labels.len();
@@ -217,15 +245,22 @@ pub fn accumulate_rank_and_tie_sums_from_argsort<D: SparseFloat>(
     }
 
     let mut i: usize = 0;
+    let mut rank: usize = 0;
+    let mut zero_pos: isize = -1;
     while i < n_values {
         // Find tie block
         let mut j = i + 1;
+        if (zero_values_offset > 0) & (x[sorted_indices[i]].to_f64() > 0.) {
+            zero_pos = i as isize;
+            rank += zero_values_offset;
+            zero_values_offset = 0;
+        }
         while j < n_values && x[sorted_indices[j]] == x[sorted_indices[i]] {
             j += 1
         }
         // Now by def: j - i gives the size of the tie block
         // Compute the average rank for this block
-        let avg_rank = 0.5 * (i + j + 1) as f64;
+        let avg_rank = rank as f64 + 0.5 * (j - i + 1) as f64;
 
         // Accumulate this avg rank in each group's palceholder
         for k in i..j {
@@ -237,12 +272,17 @@ pub fn accumulate_rank_and_tie_sums_from_argsort<D: SparseFloat>(
         }
 
         // Now take care of the tie sum
-        let tie_block_size: f64 = (j - i) as f64;
-        tie_sum += tie_block_size.powi(3) - tie_block_size;
+        let tie_block_size = j - i;
+        tie_sum += tie_block_size.pow(3) as f64 - tie_block_size as f64;
+        rank += tie_block_size;
         i = j;
     }
 
-    Ok(())
+    if (zero_pos == -1) & (zero_values_offset > 0) {
+        zero_pos = n_values as isize;
+    }
+
+    Ok(zero_pos as usize)
 }
 
 // Because only the indices are mutated in argsort, input values do not need to be a vec or a contiguous memory alloc