Skip to content

Commit 154b7ce

Browse files
committed
Add headless mode for app
1 parent 56e6c7e commit 154b7ce

File tree

2 files changed

+108
-3
lines changed

2 files changed

+108
-3
lines changed

src/app.py

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import argparse
12
import cv2
23
import os
34
import pymupdf
@@ -20,7 +21,7 @@
2021

2122
from model_metadata import load_metadata
2223
from model import load_onnx_model
23-
from ocr import process_image, process_pdf, save_analysis
24+
from ocr import process_image, process_pdf, save_analysis, write_analysis_to_stream
2425

2526

2627
class OCRThread(QThread):
@@ -45,7 +46,6 @@ def __init__(
4546
self.splitLeftRight = splitLeftRight
4647

4748
def run(self):
48-
# Simulate OCR processing
4949
try:
5050
classes = load_metadata(self.classes_path)
5151
model = load_onnx_model(self.model_path)
@@ -236,7 +236,7 @@ def parse_page_range(self, page_range):
236236
return sorted(indexes) # Sort the final list
237237

238238

239-
if __name__ == "__main__":
239+
def launch_normal():
240240
app = QApplication([])
241241

242242
widget = MyWidget()
@@ -245,3 +245,100 @@ def parse_page_range(self, page_range):
245245
widget.show()
246246

247247
sys.exit(app.exec())
248+
249+
250+
def launch_headless(args):
251+
if args.input is None:
252+
print("Please specify an input file with -i or --input.")
253+
exit()
254+
255+
metadata = load_metadata(args.meta)
256+
model = load_onnx_model(args.model)
257+
258+
if args.input.endswith(".pdf"):
259+
if args.start_page == -1:
260+
print("Please provide a page number with --start-page.")
261+
exit()
262+
263+
start = args.start_page - 1
264+
end = args.end_page - 1 if args.end_page != -1 else start
265+
266+
page_range = range(start, end + 1)
267+
268+
results = process_pdf(args.input, page_range, model, metadata, args.split_lr)
269+
else:
270+
image = cv2.imread(args.input, cv2.IMREAD_GRAYSCALE)
271+
results = process_image(image, model, metadata, args.split_lr)
272+
273+
if args.stdout:
274+
print(
275+
write_analysis_to_stream(results),
276+
flush=True,
277+
)
278+
else:
279+
save_analysis(results, args.output)
280+
281+
282+
if __name__ == "__main__":
283+
parser = argparse.ArgumentParser(description="Performs OCR on an image or PDF")
284+
285+
parser.add_argument(
286+
"--headless",
287+
help="Launch the app without a window",
288+
action="store_true",
289+
)
290+
291+
parser.add_argument(
292+
"-s",
293+
"--start-page",
294+
help="The first page to process. Required for PDFs.",
295+
type=int,
296+
default=-1,
297+
)
298+
parser.add_argument(
299+
"-e",
300+
"--end-page",
301+
help="The last page to process. If omitted, only the start page will be processed.",
302+
type=int,
303+
default=-1,
304+
)
305+
306+
parser.add_argument("-i", "--input", help="Relative path to the input file")
307+
308+
parser.add_argument(
309+
"-o",
310+
"--output",
311+
help="Relative path to the output file",
312+
default="output.yaml",
313+
)
314+
315+
parser.add_argument(
316+
"--model",
317+
help="Relative path to the model",
318+
default="current_model.onnx",
319+
)
320+
321+
parser.add_argument(
322+
"--meta",
323+
help="Relative path to model metadata",
324+
default="metadata.json",
325+
)
326+
327+
parser.add_argument(
328+
"--stdout",
329+
help="Print to stdout instead of to a file",
330+
action="store_true",
331+
)
332+
333+
parser.add_argument(
334+
"--split-lr",
335+
help="Use this flag if each PDF page contains two side-by-side pages",
336+
action="store_true",
337+
)
338+
339+
args = parser.parse_args()
340+
341+
if args.headless:
342+
launch_headless(args)
343+
else:
344+
launch_normal()

src/ocr.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,14 @@ def save_analysis(analysis, filepath="output.yaml"):
125125
)
126126

127127

128+
def write_analysis_to_stream(analysis):
129+
stream = yaml.safe_dump(
130+
analysis.to_dict(), sort_keys=False, default_flow_style=False
131+
)
132+
133+
return stream
134+
135+
128136
def process_pdf(filepath, page_range, model, metadata, split_lr=False):
129137
analysis = Analysis()
130138
analysis.model_metadata = metadata

0 commit comments

Comments
 (0)