Skip to content

Commit 9bcf99c

Browse files
author
David Huggins-Daines
committed
tests: some tables
1 parent e0601fd commit 9bcf99c

File tree

5 files changed

+51
-0
lines changed

5 files changed

+51
-0
lines changed

β€Žbenchmarks/tables.pyβ€Ž

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Benchmark table detection with logical structure."""
2+
3+
import time
4+
from pathlib import Path
5+
6+
import playa
7+
import paves.tables as pb
8+
9+
10+
def benchmark(path: Path):
11+
with playa.open(path) as doc:
12+
for table in pb.tables_structure(doc):
13+
print(table.page.page_idx, table.bbox)
14+
15+
16+
def benchmark_pagelist(path: Path):
17+
with playa.open(path) as doc:
18+
for table in pb.tables_structure(doc.pages):
19+
print(table.page.page_idx, table.bbox)
20+
21+
22+
def benchmark_pages(path: Path):
23+
with playa.open(path) as doc:
24+
for page in doc.pages:
25+
for table in pb.tables_structure(page):
26+
print(page.page_idx, table.bbox)
27+
28+
29+
if __name__ == "__main__":
30+
import argparse
31+
32+
parser = argparse.ArgumentParser(description=__doc__)
33+
parser.add_argument("pdf", type=Path)
34+
parser.add_argument("--over", choices=["doc", "page", "pagelist"])
35+
args = parser.parse_args()
36+
37+
if args.over == "doc":
38+
start = time.time()
39+
benchmark(args.pdf)
40+
multi_time = time.time() - start
41+
print("Full document took %.2fs" % multi_time)
42+
elif args.over == "pagelist":
43+
start = time.time()
44+
benchmark_pagelist(args.pdf)
45+
multi_time = time.time() - start
46+
print("PageList took %.2fs" % multi_time)
47+
elif args.over == "page":
48+
start = time.time()
49+
benchmark_pages(args.pdf)
50+
multi_time = time.time() - start
51+
print("Page took %.2fs" % multi_time)
52.9 KB
Binary file not shown.
53 KB
Binary file not shown.
22.3 KB
Binary file not shown.
107 KB
Binary file not shown.

0 commit comments

Comments
Β (0)