Skip to content

Commit dcb21ee

Browse files
committed
Added explicit negative values test in ranking
1 parent 94d61a5 commit dcb21ee

1 file changed

Lines changed: 46 additions & 17 deletions

File tree

tests/utils/test_ranking.py

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23
from scipy import sparse as sc_sparse
34
from scipy.stats import rankdata
45

@@ -10,47 +11,75 @@
1011
from illico.utils.sparse.csc import CSCMatrix
1112

1213

13-
def test_rank_sum_and_ties_from_sorted():
14+
@pytest.mark.parametrize("format", ["dense", "sparse"])
15+
def test_rank_sum_and_ties_from_sorted(format):
1416
rng = np.random.RandomState(0)
15-
A = rng.randint(0, 10, size=20)
16-
B = rng.randint(0, 10, size=15)
17-
A.sort()
18-
B.sort()
17+
A = rng.randint(-10, 10, size=20)
18+
A[:2] = 0 # Add some zeros manually
19+
B = rng.randint(-10, 10, size=15)
20+
B[:3] = 0 # Add some zeros manually
1921

20-
ranksum_B, tie_sum = rank_sum_and_ties_from_sorted(A, B)
21-
22-
# Now manually compute ranksum
22+
# First compute real ranksum and tie sum
2323
combined = np.concatenate([A, B])
2424
ranks = rankdata(combined, method="average")
2525
ranksum_B_manual = ranks[len(A) :].sum()
26-
# Manually compute tie sum
2726
_, tie_counts = np.unique(combined, return_counts=True)
2827
manual_tie_sum = (tie_counts**3 - tie_counts).sum()
2928

29+
if format == "sparse":
30+
n_zeros_A = (A == 0).sum()
31+
n_zeros_B = (B == 0).sum()
32+
n_zeros = n_zeros_A + n_zeros_B
33+
A, B = A[A != 0], B[B != 0] # Keep only positive values to have ties
34+
else:
35+
n_zeros_A = n_zeros_B = n_zeros = 0
36+
A.sort()
37+
B.sort()
38+
39+
ranksum_B, tie_sum, zero_pos = rank_sum_and_ties_from_sorted(A, B, n_zeros)
40+
# Add contributions of zeros to the ranksum
41+
ranksum_B += n_zeros_B * (zero_pos + (n_zeros + 1) / 2.0)
42+
# Add contributions of zeros to the tie sum
43+
tie_sum += n_zeros * (n_zeros**2 - 1)
3044
# Check
3145
np.testing.assert_allclose(ranksum_B, ranksum_B_manual)
3246
np.testing.assert_allclose(tie_sum, manual_tie_sum)
3347

3448

35-
def test_group_ranksum_accumulation():
49+
@pytest.mark.parametrize("format", ["dense", "sparse"])
50+
def test_group_ranksum_accumulation(format):
3651
rng = np.random.RandomState(0)
3752
arr = rng.rand(30)
53+
arr[:5] = 0 # Add some zeros manually
3854
groups = rng.randint(0, 3, size=30)
39-
idx = np.argsort(arr)
4055

41-
ranksums = np.zeros(3, dtype=np.float64)
42-
tie_sum = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums)
43-
44-
# Manually compute ranks
56+
# Manually compute ranks and tie sums on the whole array
4557
ranks = rankdata(arr, method="average")
4658
manual_ranksums = np.zeros(3, dtype=np.float64)
4759
for i in range(len(arr)):
4860
manual_ranksums[groups[i]] += ranks[i]
49-
50-
# Manually compute tie sum
5161
_, tie_counts = np.unique(arr, return_counts=True)
5262
manual_tie_sum = (tie_counts**3 - tie_counts).sum()
5363

64+
# Now compute them with illico utils
65+
if format == "sparse":
66+
n_zeros = (arr == 0).sum()
67+
nz_per_group = np.array([((groups == g) & (arr == 0)).sum() for g in range(3)])
68+
groups = groups[arr != 0]
69+
arr = arr[arr != 0]
70+
else:
71+
n_zeros = 0
72+
nz_per_group = np.zeros(3, dtype=np.float64)
73+
74+
idx = np.argsort(arr)
75+
ranksums = np.zeros(3, dtype=np.float64)
76+
tie_sum, zero_pos = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums, n_zeros)
77+
78+
# Add contributions of zeros to the ranksums
79+
ranksums += nz_per_group * (zero_pos + (n_zeros + 1) / 2.0)
80+
# Add contributions of zeros to the tie sum
81+
tie_sum += n_zeros * (n_zeros**2 - 1)
82+
5483
# Check
5584
np.testing.assert_allclose(ranksums, manual_ranksums)
5685
np.testing.assert_allclose(tie_sum, manual_tie_sum)

0 commit comments

Comments
 (0)