1+ import argparse
12import cv2
23import os
34import pymupdf
2021
2122from model_metadata import load_metadata
2223from model import load_onnx_model
23- from ocr import process_image , process_pdf , save_analysis
24+ from ocr import process_image , process_pdf , save_analysis , write_analysis_to_stream
2425
2526
2627class OCRThread (QThread ):
@@ -45,7 +46,6 @@ def __init__(
4546 self .splitLeftRight = splitLeftRight
4647
4748 def run (self ):
48- # Simulate OCR processing
4949 try :
5050 classes = load_metadata (self .classes_path )
5151 model = load_onnx_model (self .model_path )
@@ -236,7 +236,7 @@ def parse_page_range(self, page_range):
236236 return sorted (indexes ) # Sort the final list
237237
238238
239- if __name__ == "__main__" :
239+ def launch_normal () :
240240 app = QApplication ([])
241241
242242 widget = MyWidget ()
@@ -245,3 +245,100 @@ def parse_page_range(self, page_range):
245245 widget .show ()
246246
247247 sys .exit (app .exec ())
248+
249+
250+ def launch_headless (args ):
251+ if args .input is None :
252+ print ("Please specify an input file with -i or --input." )
253+ exit ()
254+
255+ metadata = load_metadata (args .meta )
256+ model = load_onnx_model (args .model )
257+
258+ if args .input .endswith (".pdf" ):
259+ if args .start_page == - 1 :
260+ print ("Please provide a page number with --start-page." )
261+ exit ()
262+
263+ start = args .start_page - 1
264+ end = args .end_page - 1 if args .end_page != - 1 else start
265+
266+ page_range = range (start , end + 1 )
267+
268+ results = process_pdf (args .input , page_range , model , metadata , args .split_lr )
269+ else :
270+ image = cv2 .imread (args .input , cv2 .IMREAD_GRAYSCALE )
271+ results = process_image (image , model , metadata , args .split_lr )
272+
273+ if args .stdout :
274+ print (
275+ write_analysis_to_stream (results ),
276+ flush = True ,
277+ )
278+ else :
279+ save_analysis (results , args .output )
280+
281+
282+ if __name__ == "__main__" :
283+ parser = argparse .ArgumentParser (description = "Performs OCR on an image or PDF" )
284+
285+ parser .add_argument (
286+ "--headless" ,
287+ help = "Launch the app without a window" ,
288+ action = "store_true" ,
289+ )
290+
291+ parser .add_argument (
292+ "-s" ,
293+ "--start-page" ,
294+ help = "The first page to process. Required for PDFs." ,
295+ type = int ,
296+ default = - 1 ,
297+ )
298+ parser .add_argument (
299+ "-e" ,
300+ "--end-page" ,
301+ help = "The last page to process. If omitted, only the start page will be processed." ,
302+ type = int ,
303+ default = - 1 ,
304+ )
305+
306+ parser .add_argument ("-i" , "--input" , help = "Relative path to the input file" )
307+
308+ parser .add_argument (
309+ "-o" ,
310+ "--output" ,
311+ help = "Relative path to the output file" ,
312+ default = "output.yaml" ,
313+ )
314+
315+ parser .add_argument (
316+ "--model" ,
317+ help = "Relative path to the model" ,
318+ default = "current_model.onnx" ,
319+ )
320+
321+ parser .add_argument (
322+ "--meta" ,
323+ help = "Relative path to model metadata" ,
324+ default = "metadata.json" ,
325+ )
326+
327+ parser .add_argument (
328+ "--stdout" ,
329+ help = "Print to stdout instead of to a file" ,
330+ action = "store_true" ,
331+ )
332+
333+ parser .add_argument (
334+ "--split-lr" ,
335+ help = "Use this flag if each PDF page contains two side-by-side pages" ,
336+ action = "store_true" ,
337+ )
338+
339+ args = parser .parse_args ()
340+
341+ if args .headless :
342+ launch_headless (args )
343+ else :
344+ launch_normal ()
0 commit comments