Skip to content

Commit 6208696

Browse files
committed
Python matrix example cleanup
Add a proper __main__ entry point, and remove dead code.
1 parent 56476d9 commit 6208696

File tree

1 file changed

+23
-55
lines changed

1 file changed

+23
-55
lines changed

flatgfa-py/examples/matrix.py

Lines changed: 23 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,29 @@
11
import pathlib
2+
import sys
23
from itertools import islice
34

45
import flatgfa
56

6-
7-
def iter_bits_ones_dash(path, chunk_bytes: int = 64 * 1024):
8-
ws = b" \t\r\n\v\f"
9-
with open(path, "rb") as f:
10-
while True:
11-
chunk = f.read(chunk_bytes)
12-
if not chunk:
13-
break
14-
for b in chunk:
15-
if b in ws:
16-
continue
17-
if b == ord("1"):
18-
yield True
19-
elif b == ord("0"):
20-
yield False
21-
else:
22-
raise ValueError(
23-
f"Invalid character in file: {chr(b)!r} (expected '1' or '0')"
24-
)
25-
26-
27-
def compare_row_to_file(row_bits, path) -> tuple[bool, int | None]:
28-
n = len(row_bits)
29-
i = 0
30-
for fb in iter_bits_ones_dash(path):
31-
if i >= n:
32-
# file has extra bits
33-
raise ValueError("File contains more bits than the row length.")
34-
if fb != bool(row_bits[i]):
35-
return False, i
36-
i += 1
37-
38-
if i != n:
39-
# file ended early
40-
raise ValueError(f"File ended early: got {i} bits, expected {n}.")
41-
return True, None
42-
43-
44-
TEST_DIR = pathlib.Path(__file__).parent
45-
TEST_GFA = TEST_DIR / "./matrix_gaf_folder/Chr3.gfa"
46-
GAF_DIR = (TEST_DIR / "./matrix_gaf_folder").resolve()
47-
# MASTER_TXT = TEST_DIR / "./matrix_gaf_folder/Chr3-pangenotype"
48-
49-
graph = flatgfa.parse(str(TEST_GFA))
50-
gaf = [str(p) for p in GAF_DIR.glob("*.gaf")]
51-
pangenotype_matrix = graph.make_pangenotype_matrix(gaf)
52-
# assert len(pangenotype_matrix) == len(gaf)
53-
# first_row = list(pangenotype_matrix[0])
54-
# ok, where = compare_row_to_file(first_row, str(MASTER_TXT))
55-
# print("OK (all columns match)" if ok else f"Mismatch at column {where}")
567
FIRST_N = 100
57-
for gaf_path, row in zip(gaf, pangenotype_matrix):
58-
row01 = [int(b) for b in row]
59-
# print(pathlib.Path(gaf_path).name, *row01)
60-
first_bits = islice(row, FIRST_N)
61-
print(pathlib.Path(gaf_path).name, *map(int, first_bits))
8+
9+
10+
def matrix_demo(gfa_path, gaf_dir):
11+
# Construct a pangenotype matrix for the `gfa_path` graph and all the GAF
12+
# files in `gaf_dir`.
13+
graph = flatgfa.parse(gfa_path)
14+
gaf = [str(p) for p in pathlib.Path(gaf_dir).glob("*.gaf")]
15+
pangenotype_matrix = graph.make_pangenotype_matrix(gaf)
16+
17+
assert len(pangenotype_matrix) == len(gaf)
18+
19+
# Just print out a few entries from the matrix.
20+
for gaf_path, row in zip(gaf, pangenotype_matrix):
21+
first_bits = islice(row, FIRST_N)
22+
print(pathlib.Path(gaf_path).name, *map(int, first_bits))
23+
24+
25+
if __name__ == "__main__":
26+
if len(sys.argv) != 3:
27+
print("usage: matrix.py <GFA file> <GAF directory>")
28+
sys.exit(1)
29+
matrix_demo(*sys.argv[1:])

0 commit comments

Comments
 (0)