Skip to content

Commit 8ad194a

Browse files
committed
Improve decomposer tests
1 parent 1625a15 commit 8ad194a

File tree

6 files changed

+193
-158
lines changed

6 files changed

+193
-158
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# helper script needed to calculate safe values for
2+
# compression factor test asserts
3+
4+
import numpy as np
5+
import graphblas
6+
from tqdm import tqdm
7+
import graphblas as gb
8+
from graphblas.core.matrix import Matrix
9+
from graphblas.core.dtypes import BOOL
10+
11+
from cfpq_decomposer.high_performance_decomposer import HighPerformanceDecomposer
12+
from cfpq_decomposer.prototype_decomposer import PrototypeDecomposer
13+
from test.cfpq_decomposer.synthetic_data import similar_rows_matrix, multiple_patterns_matrix, double_threshold_matrix, \
14+
similar_columns_matrix, random_matrix_with_patterns
15+
16+
MATRIX_FNS = {
17+
"similar_rows": similar_rows_matrix,
18+
"multiple_patterns": multiple_patterns_matrix,
19+
"double_threshold": double_threshold_matrix,
20+
"similar_columns": similar_columns_matrix,
21+
"random_patterns": random_matrix_with_patterns,
22+
}
23+
24+
def compute_case_quantile(gen_fn, runs=2500):
25+
values = []
26+
for cls in (HighPerformanceDecomposer, PrototypeDecomposer):
27+
desc = f"{gen_fn.__name__}::{cls.__name__}"
28+
for _ in tqdm(range(runs), desc=desc, leave=False):
29+
M = gen_fn()
30+
left, right = cls().decompose(M)
31+
left_right = left.mxm(right, op=graphblas.semiring.any_pair).new(dtype=BOOL)
32+
rem = M.dup(mask=~left_right.S).nvals
33+
values.append(M.nvals / (left.nvals + right.nvals + rem))
34+
return np.percentile(values, 0.1)
35+
36+
if __name__ == "__main__":
37+
quantiles = {}
38+
for key, fn in tqdm(MATRIX_FNS.items(), desc="Cases"):
39+
quantiles[key] = compute_case_quantile(fn)
40+
print("MIN_COMPRESSION_FACTORS = {")
41+
for key, val in quantiles.items():
42+
print(f' "{key}": {val:.3f},')
43+
print("}")
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import numpy as np
2+
from graphblas.core.dtypes import BOOL
3+
from graphblas.core.matrix import Matrix
4+
5+
MIN_COMPRESSION_FACTORS = {
6+
"similar_rows": 1.5,
7+
"multiple_patterns": 11,
8+
"double_threshold": 11,
9+
"similar_columns": 3.5,
10+
"random_patterns": 2.5,
11+
}
12+
13+
def similar_rows_matrix():
14+
nrows, ncols = 15, 15
15+
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
16+
base_row_indices = [0, 1, 2, 3, 4, 5]
17+
for i in range(10):
18+
if i % 3 == 0:
19+
M[i, 3] = True
20+
for j in base_row_indices:
21+
M[i, j] = True
22+
for i in range(10, nrows):
23+
M[i, i % ncols] = True
24+
return M
25+
26+
def multiple_patterns_matrix():
27+
nrows, ncols = 300, 100
28+
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
29+
for i in range(100):
30+
for j in range(20):
31+
M[i, j] = True
32+
if i % 10 == 0:
33+
M[i, 25] = True
34+
for i in range(100, 200):
35+
for j in range(30, 50):
36+
M[i, j] = True
37+
if i % 15 == 0:
38+
M[i, 55] = True
39+
for i in range(200, 300):
40+
for j in range(60, 80):
41+
M[i, j] = True
42+
if i % 20 == 0:
43+
M[i, 85] = True
44+
return M
45+
46+
def double_threshold_matrix():
47+
nrows, ncols = 100, 50
48+
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
49+
for i in range(nrows):
50+
for j in range(10):
51+
M[i, j] = True
52+
for i in range(75):
53+
for j in range(10, 20):
54+
M[i, j] = True
55+
for i in range(74):
56+
for j in range(20, 30):
57+
M[i, j] = True
58+
for i in range(76):
59+
for j in range(30, 40):
60+
M[i, j] = True
61+
for i in range(80):
62+
for j in range(40, 50):
63+
M[i, j] = True
64+
return M
65+
66+
def similar_columns_matrix():
67+
nrows, ncols = 100, 200
68+
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
69+
for j in range(50):
70+
for i in range(80):
71+
M[i, j] = True
72+
if j % 10 == 0:
73+
for i in range(80, 85):
74+
M[i, j] = True
75+
for i in range(nrows):
76+
for _ in range(5):
77+
j = np.random.randint(50, ncols)
78+
M[i, j] = True
79+
return M
80+
81+
def random_matrix_with_patterns():
82+
nrows, ncols = 500, 500
83+
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
84+
for group in range(5):
85+
rows = range(group * 100, group * 100 + 100)
86+
cols = np.random.choice(ncols, size=50, replace=False)
87+
for i in rows:
88+
for j in cols:
89+
M[i, j] = True
90+
if i % 25 == 0:
91+
extra = np.random.choice(ncols, size=5, replace=False)
92+
for j in extra:
93+
M[i, j] = True
94+
for group in range(5):
95+
cols = range(group * 100, group * 100 + 100)
96+
rows = np.random.choice(nrows, size=50, replace=False)
97+
for j in cols:
98+
for i in rows:
99+
M[i, j] = True
100+
if j % 25 == 0:
101+
extra = np.random.choice(nrows, size=5, replace=False)
102+
for i in extra:
103+
M[i, j] = True
104+
noise = int(M.nvals * 0.05)
105+
for _ in range(noise):
106+
i = np.random.randint(0, nrows)
107+
j = np.random.randint(0, ncols)
108+
M[i, j] = True
109+
return M
Lines changed: 28 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1,162 +1,41 @@
11
from abc import ABC, abstractmethod
22
from typing import Tuple
33

4-
from graphblas.core.matrix import Matrix
5-
from graphblas.core.dtypes import BOOL
6-
import numpy as np
74
import graphblas
5+
import pytest
6+
from graphblas.core.dtypes import BOOL
7+
from graphblas.core.matrix import Matrix
88

99
from cfpq_decomposer.decomposer import Decomposer
10-
11-
class TestAbstractDecomposer(ABC):
10+
from test.cfpq_decomposer.synthetic_data import (
11+
similar_rows_matrix,
12+
multiple_patterns_matrix,
13+
double_threshold_matrix,
14+
similar_columns_matrix,
15+
random_matrix_with_patterns, MIN_COMPRESSION_FACTORS
16+
)
17+
18+
class AbstractDecomposerTest(ABC):
1219
@abstractmethod
1320
def create_decomposer(self) -> Decomposer:
1421
pass
1522

1623
def decompose(self, matrix: Matrix) -> Tuple[Matrix, Matrix]:
1724
return self.create_decomposer().decompose(matrix)
1825

19-
def test_decompose_similar_rows_matrix(self):
20-
nrows, ncols = 15, 15
21-
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
22-
base_row_indices = [0, 1, 2, 3, 4, 5]
23-
for i in range(10):
24-
if i % 3 == 0:
25-
M[i, 3] = True
26-
for j in base_row_indices:
27-
M[i, j] = True
28-
for i in range(10, nrows):
29-
M[i, i % ncols] = True
30-
LEFT, RIGHT = self.decompose(M)
31-
LEFT_RIGHT: Matrix = LEFT.mxm(RIGHT, op=graphblas.semiring.any_pair).new(dtype=BOOL)
32-
33-
assert LEFT_RIGHT.dup(mask=~M.S).nvals == 0
34-
assert LEFT_RIGHT.nvals >= 18
35-
36-
def test_decompose_multiple_patterns(self):
37-
nrows, ncols = 300, 100
38-
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
39-
40-
pattern1_cols = set(range(20))
41-
for i in range(100):
42-
for j in pattern1_cols:
43-
M[i, j] = True
44-
if i % 10 == 0:
45-
M[i, 25] = True
46-
47-
pattern2_cols = set(range(30, 50))
48-
for i in range(100, 200):
49-
for j in pattern2_cols:
50-
M[i, j] = True
51-
if i % 15 == 0:
52-
M[i, 55] = True
53-
54-
pattern3_cols = set(range(60, 80))
55-
for i in range(200, 300):
56-
for j in pattern3_cols:
57-
M[i, j] = True
58-
if i % 20 == 0:
59-
M[i, 85] = True
60-
61-
LEFT, RIGHT = self.decompose(M)
62-
LEFT_RIGHT = LEFT.mxm(RIGHT, op=graphblas.semiring.any_pair).new(dtype=BOOL)
63-
64-
assert M.nvals == 6022
65-
assert LEFT_RIGHT.nvals >= 6000
66-
assert (LEFT_RIGHT | LEFT_RIGHT).new(mask=~M.S).nvals == 0
67-
68-
def test_decompose_double_thresholding(self):
69-
nrows, ncols = 100, 50
70-
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
71-
72-
for i in range(nrows):
73-
for j in range(10):
74-
M[i, j] = True
75-
76-
for i in range(75):
77-
for j in range(10, 20):
78-
M[i, j] = True
79-
80-
for i in range(74):
81-
for j in range(20, 30):
82-
M[i, j] = True
83-
84-
for i in range(76):
85-
for j in range(30, 40):
86-
M[i, j] = True
87-
88-
for i in range(80):
89-
for j in range(40, 50):
90-
M[i, j] = True
91-
92-
# Call the decompose function
93-
LEFT, RIGHT = self.decompose(M)
94-
LEFT_RIGHT = LEFT.mxm(RIGHT, op=graphblas.semiring.any_pair).new(dtype=BOOL)
95-
96-
assert M.nvals == 4050
97-
assert LEFT_RIGHT.nvals >= 4000
98-
assert (LEFT_RIGHT | LEFT_RIGHT).new(mask=~M.S).nvals == 0
99-
100-
def test_decompose_similar_columns_without_transpose(self):
101-
nrows, ncols = 100, 200
102-
M: Matrix = Matrix(BOOL, nrows=nrows, ncols=ncols)
103-
104-
for j in range(50):
105-
for i in range(80):
106-
M[i, j] = True
107-
if j % 10 == 0:
108-
for i in range(80, 85):
109-
M[i, j] = True
110-
111-
for i in range(nrows):
112-
for _ in range(5):
113-
j = np.random.randint(50, ncols)
114-
M[i, j] = True
115-
116-
LEFT, RIGHT = self.decompose(M)
117-
LEFT_RIGHT = LEFT.mxm(RIGHT, op=graphblas.semiring.any_pair).new(dtype=BOOL)
118-
119-
assert M.nvals in range(4400, 4600)
120-
assert LEFT_RIGHT.nvals >= 3900
121-
assert (LEFT_RIGHT | LEFT_RIGHT).new(mask=~M.S).nvals == 0
122-
123-
def test_decompose_random_matrix_with_patterns(self):
124-
nrows, ncols = 500, 500
125-
M = Matrix(BOOL, nrows=nrows, ncols=ncols)
126-
127-
for group in range(5):
128-
row_start = group * 100
129-
row_end = row_start + 100
130-
cols = np.random.choice(ncols, size=50, replace=False)
131-
for i in range(row_start, row_end):
132-
for j in cols:
133-
M[i, j] = True
134-
if i % 25 == 0:
135-
extra_cols = np.random.choice(ncols, size=5, replace=False)
136-
for j in extra_cols:
137-
M[i, j] = True
138-
139-
for group in range(5):
140-
col_start = group * 100
141-
col_end = col_start + 100
142-
rows = np.random.choice(nrows, size=50, replace=False)
143-
for j in range(col_start, col_end):
144-
for i in rows:
145-
M[i, j] = True
146-
if j % 25 == 0:
147-
extra_rows = np.random.choice(nrows, size=5, replace=False)
148-
for i in extra_rows:
149-
M[i, j] = True
150-
151-
num_noise_entries = int(M.nvals * 0.05)
152-
for _ in range(num_noise_entries):
153-
i = np.random.randint(0, nrows)
154-
j = np.random.randint(0, ncols)
155-
M[i, j] = True
156-
157-
LEFT, RIGHT = self.decompose(M)
158-
LEFT_RIGHT = LEFT.mxm(RIGHT, op=graphblas.semiring.any_pair).new(dtype=BOOL)
159-
160-
assert M.nvals in range(48_000, 52_000)
161-
assert LEFT_RIGHT.nvals >= 33_000
162-
assert (LEFT_RIGHT | LEFT_RIGHT).new(mask=~M.S).nvals == 0
26+
@pytest.mark.parametrize("matrix_fn,key", [
27+
pytest.param(similar_rows_matrix, "similar_rows", id="similar_rows"),
28+
pytest.param(multiple_patterns_matrix, "multiple_patterns", id="multiple_patterns"),
29+
pytest.param(double_threshold_matrix, "double_threshold", id="double_threshold"),
30+
pytest.param(similar_columns_matrix, "similar_columns", id="similar_columns"),
31+
pytest.param(random_matrix_with_patterns, "random_patterns", id="random_patterns"),
32+
])
33+
def test_decomposition(self, matrix_fn, key):
34+
matrix = matrix_fn()
35+
left, right = self.decompose(matrix)
36+
left_right = left.mxm(right, op=graphblas.semiring.any_pair).new(dtype=BOOL)
37+
assert left_right.dup(mask=~matrix.S).nvals == 0
38+
remainder = matrix.dup(mask=~left_right.S).nvals
39+
compression_factor = matrix.nvals / (left.nvals + right.nvals + remainder)
40+
print(f"Compression factor for {key} is {compression_factor}")
41+
assert compression_factor > MIN_COMPRESSION_FACTORS[key]
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from cfpq_decomposer.decomposer import Decomposer
22
from cfpq_decomposer.high_performance_decomposer import HighPerformanceDecomposer
3-
from test.cfpq_decomposer.test_abstract_decomposer import TestAbstractDecomposer
3+
from test.cfpq_decomposer.test_abstract_decomposer import AbstractDecomposerTest
44

5-
class TestHighPerformanceDecomposer(TestAbstractDecomposer):
5+
class TestHighPerformanceDecomposer(AbstractDecomposerTest):
66
def create_decomposer(self) -> Decomposer:
77
return HighPerformanceDecomposer()
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from cfpq_decomposer.decomposer import Decomposer
22
from cfpq_decomposer.prototype_decomposer import PrototypeDecomposer
3-
from test.cfpq_decomposer.test_abstract_decomposer import TestAbstractDecomposer
3+
from test.cfpq_decomposer.test_abstract_decomposer import AbstractDecomposerTest
44

55

6-
class TestPrototypeDecomposer(TestAbstractDecomposer):
6+
class TestPrototypeDecomposer(AbstractDecomposerTest):
77
def create_decomposer(self) -> Decomposer:
88
return PrototypeDecomposer()

test/conftest.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ def pytest_configure(config):
1818

1919
@pytest.fixture(params=os.listdir(POCR_FORMAT_DATA))
2020
def pocr_data_path(request):
21-
base_path = os.path.abspath(POCR_FORMAT_DATA)
22-
subfolder_path = os.path.join(base_path, request.param)
23-
if os.path.isdir(subfolder_path):
24-
return subfolder_path
25-
pytest.skip(f"{request.param} is not a directory")
21+
base_path: str = os.path.abspath(POCR_FORMAT_DATA)
22+
subfolder_path: str = os.path.join(base_path, request.param)
23+
if not os.path.isdir(subfolder_path):
24+
pytest.skip(f"{request.param} is not a directory")
25+
return None
26+
if not os.path.exists(os.path.join(subfolder_path, "expected_all_pairs_reachability.txt")):
27+
pytest.skip(f"{request.param} doesn't contain 'expected_all_pairs_reachability.txt'")
28+
return None
29+
return subfolder_path

0 commit comments

Comments
 (0)