diff --git a/changelog.md b/changelog.md index 868e765..0f1e280 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,10 @@ Changelog ========= +Version 0.5.1 +------------ +- Fixes bug when sparse matrices contain strictly negative values, cf [this discussion](https://scverse.zulipchat.com/#narrow/channel/557094-differential-gene-expression/topic/Nonnegative.20expectation/near/590691572). + Version 0.5.0rc2 ------------ - For better compatibility with `scanpy`, lower bounds on dependencies have been relaxed. diff --git a/illico/ovo/dense_ovo.py b/illico/ovo/dense_ovo.py index e39f87a..072d874 100644 --- a/illico/ovo/dense_ovo.py +++ b/illico/ovo/dense_ovo.py @@ -44,7 +44,7 @@ def dense_ovo_mwu_kernel( n = n_ref + n_tgt mu = n_ref * n_tgt / 2.0 for j in range(ncols): - ranksum, tie_sum = rank_sum_and_ties_from_sorted(sorted_ref_data[:, j], sorted_tgt_data[:, j]) + ranksum, tie_sum, _ = rank_sum_and_ties_from_sorted(sorted_ref_data[:, j], sorted_tgt_data[:, j]) # Compute U-stat U1 = ranksum - n_tgt * (n_tgt + 1) / 2.0 diff --git a/illico/ovo/sparse_ovo.py b/illico/ovo/sparse_ovo.py index 2e56bfe..3066128 100644 --- a/illico/ovo/sparse_ovo.py +++ b/illico/ovo/sparse_ovo.py @@ -73,15 +73,14 @@ def single_group_sparse_ovo_mwu_kernel( lbr, ubr = sorted_ref_data.indptr[j], sorted_ref_data.indptr[j + 1] # Compute ranksum and tie sum for non zero values - ranksum, tie_sum = rank_sum_and_ties_from_sorted(sorted_ref_data.data[lbr:ubr], sorted_tgt_data.data[lbt:ubt]) - - # Offset the ranks of the number of zeros in ref and perturbed - ranksum += n_zeros_combined * (ubt - lbt) + nz_ranksum, tie_sum, zpos = rank_sum_and_ties_from_sorted( + sorted_ref_data.data[lbr:ubr], sorted_tgt_data.data[lbt:ubt], zero_values_offset=n_zeros_combined + ) # Compute ranksum n0 = n_zeros_tgt[j] - R1_nz = ranksum # Sum ranks - R1 = R1_nz + n0 * (n_zeros_ref[j] + n0 + 1) / 2.0 # Add sumranks of zeros + z_ranksum = (zpos + (n_zeros_ref[j] + n0 + 1) / 2.0) * n0 # Add sumranks of zeros + R1 = nz_ranksum + z_ranksum # Add sumranks of zeros # Compute U-stat U1 = R1 - n_tgt * (n_tgt + 1) / 2 diff --git a/illico/ovr/dense_ovr.py b/illico/ovr/dense_ovr.py index 1f1f744..6e96664 100644 --- a/illico/ovr/dense_ovr.py +++ b/illico/ovr/dense_ovr.py @@ -53,7 +53,7 @@ def dense_ovr_mwu_kernel_over_contiguous_col_chunk( ranksums = np.zeros(shape=(grpc.counts.size, chunk.shape[1]), dtype=np.float64) for j in range(chunk.shape[1]): idxs = np.argsort(chunk[:, j]) - col_tie_sum = _accumulate_group_ranksums_from_argsort(chunk[:, j], idxs, grpc.encoded_groups, ranksums[:, j]) + col_tie_sum, _ = _accumulate_group_ranksums_from_argsort(chunk[:, j], idxs, grpc.encoded_groups, ranksums[:, j]) tie_sum[j] = col_tie_sum # Compute U stats diff --git a/illico/ovr/sparse_ovr.py b/illico/ovr/sparse_ovr.py index 9535cf7..b33a51f 100644 --- a/illico/ovr/sparse_ovr.py +++ b/illico/ovr/sparse_ovr.py @@ -66,19 +66,18 @@ def sparse_ovr_mwu_kernel( nz_idx = X.indices[start:end] """Step 1: compute ranksum of non-zero elements, per group""" _idxs = np.argsort(X.data[start:end]) - tie_sum = _accumulate_group_ranksums_from_argsort(X.data[start:end], _idxs, groups[nz_idx], R1_nz[:, j]) + tie_sum, zero_pos = _accumulate_group_ranksums_from_argsort( + X.data[start:end], _idxs, groups[nz_idx], R1_nz[:, j], zero_values_offset=int(n_zeros[j]) + ) n0 = n_zeros[j] """Step 2: offset non-zero elements ranks by the number of zeros that precedes them""" if nz_idx.size: _add_at_scalar(nnz_per_group[:, j], groups[nz_idx], 1.0) # Deduce number of zeros per group nz_per_group = group_counts - nnz_per_group[:, j] - # Offset the non-zero ranks by the amount of 0 that precedes them - # All ranks must be shifted, so the sum is shifted by that many elements. - R1_nz[:, j] += n0 * nnz_per_group[:, j] """ Step 3: Add ranksums of zero elements, per group""" # add zero contribution: number of zeros * avg rank - R1 = R1_nz[:, j] + nz_per_group * (n0 + 1) / 2.0 + R1 = R1_nz[:, j] + nz_per_group * (n0 + 1 + 2 * zero_pos) / 2.0 U[:, j] = R1 - n_tgt * (n_tgt + 1) / 2 tie_sum += n0**3 - n0 diff --git a/illico/utils/ranking.py b/illico/utils/ranking.py index 5266694..e43aa0e 100644 --- a/illico/utils/ranking.py +++ b/illico/utils/ranking.py @@ -6,8 +6,8 @@ @njit(nogil=True, cache=False, fastmath=True) def _accumulate_group_ranksums_from_argsort( - arr: np.ndarray, idx: np.ndarray, groups: np.ndarray, ranksums: np.ndarray -) -> np.ndarray: + arr: np.ndarray, idx: np.ndarray, groups: np.ndarray, ranksums: np.ndarray, zero_values_offset: int = 0 +) -> tuple[float, int]: """From a given array of values, indices of sorted values (result of np.argsort) and groups, accumulate group rank sums in the placegolder `ranksums`. @@ -16,25 +16,33 @@ def _accumulate_group_ranksums_from_argsort( idx (np.ndarray): Array of indices of sorted values groups (np.ndarray): Array of group indicator ranksums (np.ndarray): Plaeholder of shape (n_groups, ) where to accumulate rank sums + zero_values_offset (int): If > 0, it means that there are zeros not present in the input arrays but that + they should be accounted for. This is only used when the input adata is sparse, and ranksum is computed + on non zero values. Returns: - np.ndarray: tie sums + float: tie sums + int: position of zeros in the sorted array (if zero_values_offset > 0) Author: Rémy Dubois """ - # if ranks is None: - # ranks = np.empty(arr.size, dtype=np.float64) + zero_pos = -1 n = idx.size i = 0 + rank = 0 tie_sum = 0.0 + while i < n: # find tie block j = i + 1 + if zero_values_offset > 0 and arr[idx[i]] > 0: + zero_pos = i + rank += zero_values_offset + zero_values_offset = 0 while j < n and arr[idx[j]] == arr[idx[i]]: j += 1 - # average rank for indices i..j-1 (1-based) - avg_rank = 0.5 * (i + 1 + j) # mean of [i+1, ..., j] + avg_rank = rank + 0.5 * (j - i + 1) for k in range(i, j): if groups is not None: ranksums[groups[idx[k]]] += avg_rank @@ -42,15 +50,19 @@ def _accumulate_group_ranksums_from_argsort( # If no group is specified, ranks are sum reduced into one value for the whole column ranksums += avg_rank tie_count = j - i - # Tie sum is the same for all froups + # Tie sum is the same for all groups tie_sum += tie_count**3 - tie_count i = j + rank += tie_count + + if zero_pos == -1 and zero_values_offset > 0: + zero_pos = n - return tie_sum + return tie_sum, zero_pos @njit(nogil=True) -def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray]: +def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray, zero_values_offset: int = 0) -> tuple[np.ndarray]: """Compute rank sums and tie sums from two 1-d sorted arrays. This routine is similar to the leetcode "merge two sorted arrays", except it @@ -62,6 +74,9 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar Args: A (np.ndarray): The first sorted array (controls) B (np.ndarray): The second sorted array (perturbed) + zero_values_offset (int): If > 0, it means that there are zeros not present in the input arrays but that + they should be accounted for. This is only used when the input adata is sparse, and ranksum is computed + on non zero values. Returns: tuple[np.ndarray]: Ranks sum from the second array, and tie sums for the combined @@ -76,6 +91,7 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar i = 0 j = 0 k = 0 # number of items processed so far (0-based) + zero_pos = -1 # Default setting sum_ranks_B = 0.0 tie_sum = 0.0 @@ -88,6 +104,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar else: v = B[j] + if v > 0 and zero_values_offset > 0: + zero_pos = k + k += zero_values_offset + zero_values_offset = 0 + # count occurrences in A tA = 0 ii = i @@ -118,6 +139,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar while i < nA: v = A[i] + if v > 0 and zero_values_offset > 0: + zero_pos = k + k += zero_values_offset + zero_values_offset = 0 + # count in A tA = 0 ii = i @@ -139,6 +165,11 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar while j < nB: v = B[j] + if v > 0 and zero_values_offset > 0: + zero_pos = k + k += zero_values_offset + zero_values_offset = 0 + # count in B tB = 0 jj = j @@ -156,7 +187,12 @@ def rank_sum_and_ties_from_sorted(A: np.ndarray, B: np.ndarray) -> tuple[np.ndar k += t j = jj - return sum_ranks_B, tie_sum + # if zero_pos is still not set, it means there are no positive values in the entire column. + # so zeros come at the end of it + if zero_pos == -1 and zero_values_offset > 0: + zero_pos = k + + return sum_ranks_B, tie_sum, zero_pos @njit(nogil=True, cache=False) diff --git a/pyproject.toml b/pyproject.toml index e9a0dd9..03e9a8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "illico" -version = "0.5.0rc2" +version = "0.5.1" description = "Fast asymptotic mannwhitney-u test" authors = [ {name = "remydubois",email = "remydubois14@gmail.com"} diff --git a/src/dense_ovo.rs b/src/dense_ovo.rs index ab8f78b..ceef251 100644 --- a/src/dense_ovo.rs +++ b/src/dense_ovo.rs @@ -31,7 +31,7 @@ pub fn dense_ovo_kernel( let remainder = &n_tgt * (&n_tgt + 1.) / 2.; for j in 0..n_cols as usize { - let (rs, ts) = rank_sum_and_ties(sorted_controls.column(j), sorted_tgt.column(j)); + let (rs, ts, _) = rank_sum_and_ties(sorted_controls.column(j), sorted_tgt.column(j), 0); let u = rs - remainder; diff --git a/src/dense_ovr.rs b/src/dense_ovr.rs index ead4635..1170486 100644 --- a/src/dense_ovr.rs +++ b/src/dense_ovr.rs @@ -39,6 +39,7 @@ pub fn dense_ovr_kernel( grpc.encoded_groups, ranksums.column_mut(j), tie_sums.slice_mut(s![j]), + 0, )?; } diff --git a/src/ranking.rs b/src/ranking.rs index 9ea5474..acc6971 100644 --- a/src/ranking.rs +++ b/src/ranking.rs @@ -50,7 +50,11 @@ pub fn sort_along_axis_0_inplace_rust(mut x: PyReadwriteArray2) -> PyResult Ok(()) } -pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1) -> (f64, f64) { +pub fn rank_sum_and_ties( + ctrl: ArrayView1, + tgt: ArrayView1, + mut zero_values_offset: usize, +) -> (f64, f64, usize) { let n_ctrl = ctrl.len(); let n_tgt = tgt.len(); @@ -58,6 +62,7 @@ pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1 let mut i = 0; let mut j = 0; let mut k: usize = 0; + let mut zero_pos: isize = -1; // Initialize accumulators let mut rank_sum_tgt: f64 = 0.; @@ -67,6 +72,12 @@ pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1 while i < n_ctrl && j < n_tgt { let v = ctrl[i].min(tgt[j]); + if (v.to_f64() > 0.) & (zero_values_offset > 0) { + zero_pos = k as isize; + k += zero_values_offset; + zero_values_offset = 0; + } + // Count unique values in control values let mut count_ctrl: usize = 0; let mut offset_i = i; @@ -119,6 +130,12 @@ pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1 while i < n_ctrl { let v = ctrl[i]; + if (v.to_f64() > 0.) & (zero_values_offset > 0) { + zero_pos = k as isize; + k += zero_values_offset; + zero_values_offset = 0; + } + // Count unique values in control values let mut count_ctrl: usize = 0; for l in i..n_ctrl { @@ -140,6 +157,12 @@ pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1 while j < n_tgt { let v = tgt[j]; + if (v.to_f64() > 0.) && (zero_values_offset > 0) { + zero_pos = k as isize; + k += zero_values_offset; + zero_values_offset = 0; + } + // Count unique values in target values let mut count_tgt: usize = 0; for l in j..n_tgt { @@ -161,7 +184,11 @@ pub fn rank_sum_and_ties(ctrl: ArrayView1, tgt: ArrayView1 k += count_tgt; } - return (rank_sum_tgt, tie_sum); + if (zero_pos == -1) && (zero_values_offset > 0) { + zero_pos = k as isize; + } + + return (rank_sum_tgt, tie_sum, zero_pos as usize); } #[pyfunction] @@ -171,7 +198,7 @@ pub fn rank_sum_and_ties_rust( ) -> (f64, f64) { let controls = controls.as_array(); let target = target.as_array(); - let (ranksum, tiesum) = rank_sum_and_ties(controls, target); + let (ranksum, tiesum, _) = rank_sum_and_ties(controls, target, 0); return (ranksum, tiesum); } @@ -181,7 +208,8 @@ pub fn accumulate_rank_and_tie_sums_from_argsort( group_labels: ArrayView1, mut ranksums: ArrayViewMut1, mut tie_sum: ArrayViewMut0, -) -> Result<(), String> { + mut zero_values_offset: usize, +) -> Result { let n_sorted_idx = sorted_indices.len(); let n_values = x.len(); let n_grp_indices = group_labels.len(); @@ -217,15 +245,22 @@ pub fn accumulate_rank_and_tie_sums_from_argsort( } let mut i: usize = 0; + let mut rank: usize = 0; + let mut zero_pos: isize = -1; while i < n_values { // Find tie block let mut j = i + 1; + if (zero_values_offset > 0) && (x[sorted_indices[i]].to_f64() > 0.) { + zero_pos = i as isize; + rank += zero_values_offset; + zero_values_offset = 0; + } while j < n_values && x[sorted_indices[j]] == x[sorted_indices[i]] { j += 1 } // Now by def: j - i gives the size of the tie block // Compute the average rank for this block - let avg_rank = 0.5 * (i + j + 1) as f64; + let avg_rank = rank as f64 + 0.5 * (j - i + 1) as f64; // Accumulate this avg rank in each group's palceholder for k in i..j { @@ -237,12 +272,17 @@ pub fn accumulate_rank_and_tie_sums_from_argsort( } // Now take care of the tie sum - let tie_block_size: f64 = (j - i) as f64; - tie_sum += tie_block_size.powi(3) - tie_block_size; + let tie_block_size = j - i; + tie_sum += tie_block_size.pow(3) as f64 - tie_block_size as f64; + rank += tie_block_size; i = j; } - Ok(()) + if (zero_pos == -1) && (zero_values_offset > 0) { + zero_pos = n_values as isize; + } + + Ok(zero_pos as usize) } // Because only the indices are mutated in argsort, input values do not need to be a vec or a contiguous memory alloc diff --git a/src/sparse_ovo.rs b/src/sparse_ovo.rs index ba2da45..c8bc1bb 100644 --- a/src/sparse_ovo.rs +++ b/src/sparse_ovo.rs @@ -51,13 +51,14 @@ pub fn single_group_sparse_ovo_mwu_kernel( let (lbt, ubt) = (tgt.indptr[j].to_usize(), tgt.indptr[j + 1].to_usize()); // Compute ranksum and tiesum on non zeros only first - let (mut ranksum, mut tiesum) = - rank_sum_and_ties(ctrl.data.slice(s![lbc..ubc]), tgt.data.slice(s![lbt..ubt])); + let (mut ranksum, mut tiesum, zero_pos) = rank_sum_and_ties( + ctrl.data.slice(s![lbc..ubc]), + tgt.data.slice(s![lbt..ubt]), + n_zeros_total as usize, + ); - // Offset ranksum of nonzeros elements: all the ranks must be increase by n_zeros_total, and all the target elements (ubt - lbt) contribute to the ranksum - ranksum += n_zeros_total * (ubt - lbt) as f64; // Now compute the ranksum of zero elements, those contribute as well - let rank_of_zeros = (n_zeros_ctrl[j] + n_zeros_tgt[j] + 1.) * 0.5; + let rank_of_zeros = (n_zeros_ctrl[j] + n_zeros_tgt[j] + 1.) * 0.5 + zero_pos as f64; ranksum += rank_of_zeros * n_zeros_tgt[j]; // Now, icnrement the tiesum with the zeros diff --git a/src/sparse_ovr.rs b/src/sparse_ovr.rs index fb05f8f..1efe700 100644 --- a/src/sparse_ovr.rs +++ b/src/sparse_ovr.rs @@ -50,12 +50,13 @@ pub fn sparse_ovr_mwu_kernel( .indices .slice(s![start..end]) .mapv(|x| grpc.encoded_groups[x.to_usize()]); - accumulate_rank_and_tie_sums_from_argsort( + let zero_pos = accumulate_rank_and_tie_sums_from_argsort( col_nz_values, argsorted_idxs, nz_group_labels.view(), ranksum.column_mut(j), tiesum.slice_mut(s![j]), + n_zeros[j], )?; // Need to offset the ranks by the number of zeros per group @@ -66,10 +67,9 @@ pub fn sparse_ovr_mwu_kernel( // This is messy: why nz_per_group is column-unique while nnz_per_group is all columns let nz_per_group = &grpc.counts - &nnz_per_group.column(j); let mut rs = ranksum.column_mut(j); - rs += &(n_zeros[j] * &nnz_per_group.column(j)).mapv(|x| x as f64); // nnz and not nz ! // Now need to add the contributions of zero to the ranksum of each group - rs += &((nz_per_group * (n_zeros[j] + 1)).mapv(|x| x as f64) / 2.); + rs += &((nz_per_group * (n_zeros[j] + 1 + 2 * zero_pos)).mapv(|x| x as f64) / 2.); let mut ts = tiesum.slice_mut(s![j]); ts += (n_zeros[j].pow(3) - n_zeros[j]) as f64; diff --git a/tests/conftest.py b/tests/conftest.py index b3b671e..458f249 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,6 +101,14 @@ def rand_adata(request, tmp_path): # Create groups associated groups = rng.randint(0, n_groups, size=n_cells) # Add one ref group + # Now cover all possible negative value scenarios: some with all negative values, some with a mix of both + # By default dense_counts only contains positive values + dense_counts[:, 0] *= -1.0 # all negative in the first column + dense_counts[groups == 0, 1] *= -1.0 # First group all negative, rest all positives in the second column + dense_counts[np.isin(groups, [0, 1]), 2] *= -1.0 # Two groups all negative, rest all positives in the third column + dense_counts[groups == 0, 3][::2] *= -1.0 # Ref is both pos and neg in the fourth column + dense_counts[groups == 1, 4][::2] *= -1.0 # Target is both pos and neg in the fifth column + fmt, lazy = request.param if fmt == "dense": data_matrix = dense_counts diff --git a/tests/utils/test_ranking.py b/tests/utils/test_ranking.py index 852e438..f2bf75b 100644 --- a/tests/utils/test_ranking.py +++ b/tests/utils/test_ranking.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from scipy import sparse as sc_sparse from scipy.stats import rankdata @@ -10,47 +11,75 @@ from illico.utils.sparse.csc import CSCMatrix -def test_rank_sum_and_ties_from_sorted(): +@pytest.mark.parametrize("format", ["dense", "sparse"]) +def test_rank_sum_and_ties_from_sorted(format): rng = np.random.RandomState(0) - A = rng.randint(0, 10, size=20) - B = rng.randint(0, 10, size=15) - A.sort() - B.sort() + A = rng.randint(-10, 10, size=20) + A[:2] = 0 # Add some zeros manually + B = rng.randint(-10, 10, size=15) + B[:3] = 0 # Add some zeros manually - ranksum_B, tie_sum = rank_sum_and_ties_from_sorted(A, B) - - # Now manually compute ranksum + # First compute real ranksum and tie sum combined = np.concatenate([A, B]) ranks = rankdata(combined, method="average") ranksum_B_manual = ranks[len(A) :].sum() - # Manually compute tie sum _, tie_counts = np.unique(combined, return_counts=True) manual_tie_sum = (tie_counts**3 - tie_counts).sum() + if format == "sparse": + n_zeros_A = (A == 0).sum() + n_zeros_B = (B == 0).sum() + n_zeros = n_zeros_A + n_zeros_B + A, B = A[A != 0], B[B != 0] # Keep only non-null values to have ties + else: + n_zeros_A = n_zeros_B = n_zeros = 0 + A.sort() + B.sort() + + ranksum_B, tie_sum, zero_pos = rank_sum_and_ties_from_sorted(A, B, n_zeros) + # Add contributions of zeros to the ranksum + ranksum_B += n_zeros_B * (zero_pos + (n_zeros + 1) / 2.0) + # Add contributions of zeros to the tie sum + tie_sum += n_zeros * (n_zeros**2 - 1) # Check np.testing.assert_allclose(ranksum_B, ranksum_B_manual) np.testing.assert_allclose(tie_sum, manual_tie_sum) -def test_group_ranksum_accumulation(): +@pytest.mark.parametrize("format", ["dense", "sparse"]) +def test_group_ranksum_accumulation(format): rng = np.random.RandomState(0) arr = rng.rand(30) + arr[:5] = 0 # Add some zeros manually groups = rng.randint(0, 3, size=30) - idx = np.argsort(arr) - ranksums = np.zeros(3, dtype=np.float64) - tie_sum = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums) - - # Manually compute ranks + # Manually compute ranks and tie sums on the whole array ranks = rankdata(arr, method="average") manual_ranksums = np.zeros(3, dtype=np.float64) for i in range(len(arr)): manual_ranksums[groups[i]] += ranks[i] - - # Manually compute tie sum _, tie_counts = np.unique(arr, return_counts=True) manual_tie_sum = (tie_counts**3 - tie_counts).sum() + # Now compute them with illico utils + if format == "sparse": + n_zeros = (arr == 0).sum() + nz_per_group = np.array([((groups == g) & (arr == 0)).sum() for g in range(3)]) + groups = groups[arr != 0] + arr = arr[arr != 0] + else: + n_zeros = 0 + nz_per_group = np.zeros(3, dtype=np.float64) + + idx = np.argsort(arr) + ranksums = np.zeros(3, dtype=np.float64) + tie_sum, zero_pos = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums, n_zeros) + + # Add contributions of zeros to the ranksums + ranksums += nz_per_group * (zero_pos + (n_zeros + 1) / 2.0) + # Add contributions of zeros to the tie sum + tie_sum += n_zeros * (n_zeros**2 - 1) + # Check np.testing.assert_allclose(ranksums, manual_ranksums) np.testing.assert_allclose(tie_sum, manual_tie_sum)