|
1 | 1 | import numpy as np |
| 2 | +import pytest |
2 | 3 | from scipy import sparse as sc_sparse |
3 | 4 | from scipy.stats import rankdata |
4 | 5 |
|
|
10 | 11 | from illico.utils.sparse.csc import CSCMatrix |
11 | 12 |
|
12 | 13 |
|
13 | | -def test_rank_sum_and_ties_from_sorted(): |
| 14 | +@pytest.mark.parametrize("format", ["dense", "sparse"]) |
| 15 | +def test_rank_sum_and_ties_from_sorted(format): |
14 | 16 | rng = np.random.RandomState(0) |
15 | | - A = rng.randint(0, 10, size=20) |
16 | | - B = rng.randint(0, 10, size=15) |
17 | | - A.sort() |
18 | | - B.sort() |
| 17 | + A = rng.randint(-10, 10, size=20) |
| 18 | + A[:2] = 0 # Add some zeros manually |
| 19 | + B = rng.randint(-10, 10, size=15) |
| 20 | + B[:3] = 0 # Add some zeros manually |
19 | 21 |
|
20 | | - ranksum_B, tie_sum = rank_sum_and_ties_from_sorted(A, B) |
21 | | - |
22 | | - # Now manually compute ranksum |
| 22 | + # First compute real ranksum and tie sum |
23 | 23 | combined = np.concatenate([A, B]) |
24 | 24 | ranks = rankdata(combined, method="average") |
25 | 25 | ranksum_B_manual = ranks[len(A) :].sum() |
26 | | - # Manually compute tie sum |
27 | 26 | _, tie_counts = np.unique(combined, return_counts=True) |
28 | 27 | manual_tie_sum = (tie_counts**3 - tie_counts).sum() |
29 | 28 |
|
| 29 | + if format == "sparse": |
| 30 | + n_zeros_A = (A == 0).sum() |
| 31 | + n_zeros_B = (B == 0).sum() |
| 32 | + n_zeros = n_zeros_A + n_zeros_B |
| 33 | + A, B = A[A != 0], B[B != 0] # Keep only positive values to have ties |
| 34 | + else: |
| 35 | + n_zeros_A = n_zeros_B = n_zeros = 0 |
| 36 | + A.sort() |
| 37 | + B.sort() |
| 38 | + |
| 39 | + ranksum_B, tie_sum, zero_pos = rank_sum_and_ties_from_sorted(A, B, n_zeros) |
| 40 | + # Add contributions of zeros to the ranksum |
| 41 | + ranksum_B += n_zeros_B * (zero_pos + (n_zeros + 1) / 2.0) |
| 42 | + # Add contributions of zeros to the tie sum |
| 43 | + tie_sum += n_zeros * (n_zeros**2 - 1) |
30 | 44 | # Check |
31 | 45 | np.testing.assert_allclose(ranksum_B, ranksum_B_manual) |
32 | 46 | np.testing.assert_allclose(tie_sum, manual_tie_sum) |
33 | 47 |
|
34 | 48 |
|
35 | | -def test_group_ranksum_accumulation(): |
| 49 | +@pytest.mark.parametrize("format", ["dense", "sparse"]) |
| 50 | +def test_group_ranksum_accumulation(format): |
36 | 51 | rng = np.random.RandomState(0) |
37 | 52 | arr = rng.rand(30) |
| 53 | + arr[:5] = 0 # Add some zeros manually |
38 | 54 | groups = rng.randint(0, 3, size=30) |
39 | | - idx = np.argsort(arr) |
40 | 55 |
|
41 | | - ranksums = np.zeros(3, dtype=np.float64) |
42 | | - tie_sum = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums) |
43 | | - |
44 | | - # Manually compute ranks |
| 56 | + # Manually compute ranks and tie sums on the whole array |
45 | 57 | ranks = rankdata(arr, method="average") |
46 | 58 | manual_ranksums = np.zeros(3, dtype=np.float64) |
47 | 59 | for i in range(len(arr)): |
48 | 60 | manual_ranksums[groups[i]] += ranks[i] |
49 | | - |
50 | | - # Manually compute tie sum |
51 | 61 | _, tie_counts = np.unique(arr, return_counts=True) |
52 | 62 | manual_tie_sum = (tie_counts**3 - tie_counts).sum() |
53 | 63 |
|
| 64 | + # Now compute them with illico utils |
| 65 | + if format == "sparse": |
| 66 | + n_zeros = (arr == 0).sum() |
| 67 | + nz_per_group = np.array([((groups == g) & (arr == 0)).sum() for g in range(3)]) |
| 68 | + groups = groups[arr != 0] |
| 69 | + arr = arr[arr != 0] |
| 70 | + else: |
| 71 | + n_zeros = 0 |
| 72 | + nz_per_group = np.zeros(3, dtype=np.float64) |
| 73 | + |
| 74 | + idx = np.argsort(arr) |
| 75 | + ranksums = np.zeros(3, dtype=np.float64) |
| 76 | + tie_sum, zero_pos = _accumulate_group_ranksums_from_argsort(arr, idx, groups, ranksums, n_zeros) |
| 77 | + |
| 78 | + # Add contributions of zeros to the ranksums |
| 79 | + ranksums += nz_per_group * (zero_pos + (n_zeros + 1) / 2.0) |
| 80 | + # Add contributions of zeros to the tie sum |
| 81 | + tie_sum += n_zeros * (n_zeros**2 - 1) |
| 82 | + |
54 | 83 | # Check |
55 | 84 | np.testing.assert_allclose(ranksums, manual_ranksums) |
56 | 85 | np.testing.assert_allclose(tie_sum, manual_tie_sum) |
|
0 commit comments