@@ -33,30 +33,40 @@ def __init__(self, **kwargs):
3333 """Initialize PaddleOCR backend"""
3434 self .ocr = None
3535 try :
36+ import os
37+ import paddle
38+
39+ # Set PaddlePaddle to use CPU and disable GPU
40+ os .environ ['CUDA_VISIBLE_DEVICES' ] = ''
41+ paddle .set_device ('cpu' )
42+
3643 from paddleocr import PaddleOCR
37- # Default OCR configuration
44+ # Simplified OCR configuration
3845 ocr_config = {
39- "text_det_limit_type" : "max" , # Change from 'min' to 'max'
40- "text_det_limit_side_len" : 960 , # A standard and safe limit for the longest side
41- "use_doc_orientation_classify" : False , # Do not use document image orientation classification
42- "use_doc_unwarping" : False , # Do not use document unwarping
43- "use_textline_orientation" : False , # Do not use textline orientation classification
44- "text_recognition_model_name" : "PP-OCRv5_server_rec" ,
45- "text_detection_model_name" : "PP-OCRv5_server_det" ,
46- "text_recognition_model_dir" : "/root/.paddlex/official_models/PP-OCRv5_server_rec_infer" ,
47- "text_detection_model_dir" : "/root/.paddlex/official_models/PP-OCRv5_server_det_infer" ,
48- "text_det_thresh" : 0.3 , # Text detection pixel threshold
49- "text_det_box_thresh" : 0.6 , # Text detection box threshold
50- "text_det_unclip_ratio" : 1.5 , # Text detection expansion ratio
51- "text_rec_score_thresh" : 0.0 , # Text recognition confidence threshold
52- "ocr_version" : "PP-OCRv5" , # Switch to PP-OCRv4 here to compare
46+ "use_gpu" : False ,
47+ "text_det_limit_type" : "max" ,
48+ "text_det_limit_side_len" : 960 ,
49+ "use_doc_orientation_classify" : False ,
50+ "use_doc_unwarping" : False ,
51+ "use_textline_orientation" : False ,
52+ "text_recognition_model_name" : "PP-OCRv4_server_rec" ,
53+ "text_detection_model_name" : "PP-OCRv4_server_det" ,
54+ "text_det_thresh" : 0.3 ,
55+ "text_det_box_thresh" : 0.6 ,
56+ "text_det_unclip_ratio" : 1.5 ,
57+ "text_rec_score_thresh" : 0.0 ,
58+ "ocr_version" : "PP-OCRv4" ,
5359 "lang" : "ch" ,
60+ "show_log" : False ,
61+ "use_dilation" : True , # improves accuracy
62+ "det_db_score_mode" : "slow" , # improves accuracy
5463 }
5564
5665 self .ocr = PaddleOCR (** ocr_config )
5766 logger .info ("PaddleOCR engine initialized successfully" )
58- except ImportError :
59- logger .error ("Failed to import paddleocr. Please install it with 'pip install paddleocr'" )
67+
68+ except ImportError as e :
69+ logger .error (f"Failed to import paddleocr: { str (e )} . Please install it with 'pip install paddleocr'" )
6070 except Exception as e :
6171 logger .error (f"Failed to initialize PaddleOCR: { str (e )} " )
6272
@@ -71,50 +81,39 @@ def predict(self, image):
7181 """
7282 try :
7383 # Ensure image is in RGB format
74- if hasattr (image , "convert" ):
75- if image .mode == "RGBA" :
76- img_for_ocr = image .convert ("RGB" ) # 尝试转换为 RGB
77- logger .info (f"Converted image from RGBA to RGB format for OCR." )
78- elif image .mode != "RGB" : # 如果不是 RGBA 也不是 RGB,也尝试转 RGB
79- img_for_ocr = image .convert ("RGB" )
80- logger .info (f"Converted image from { image .mode } to RGB format for OCR." )
81- else :
82- img_for_ocr = image
83- logger .info (f"Image already in RGB format." )
84- else :
85- img_for_ocr = image
86- logger .info (f"Image is not a PIL.Image object, assuming it's already suitable for OCR." )
84+ if hasattr (image , "convert" ) and image .mode != "RGB" :
85+ image = image .convert ("RGB" )
8786
88- # Convert to numpy array if not already
89- if hasattr (img_for_ocr , "convert" ):
90- image_array = np .array (img_for_ocr )
87+ # Convert to numpy array if needed
88+ if hasattr (image , "convert" ):
89+ image_array = np .array (image )
9190 else :
92- image_array = img_for_ocr
91+ image_array = image
9392
94- ocr_result = self .ocr .predict (image_array )
93+ # Perform OCR
94+ ocr_result = self .ocr .ocr (image_array , cls = False )
9595
9696 # Extract text
97- if ocr_result and any (ocr_result ):
98- ocr_text = ""
99- for image_result in ocr_result :
100- ocr_text = ocr_text + " " .join (image_result ["rec_texts" ])
101- text_length = len (ocr_text )
102- if text_length > 0 :
103- logger .info (f"OCR extracted { text_length } characters" )
104- logger .info (
105- f"OCR text sample: { ocr_text [:100 ]} ..."
106- if text_length > 100
107- else f"OCR text: { ocr_text } "
108- )
109- return ocr_text
110- else :
111- logger .warning ("OCR returned empty result" )
97+ ocr_text = ""
98+ if ocr_result and ocr_result [0 ]:
99+ for line in ocr_result [0 ]:
100+ if line and len (line ) >= 2 :
101+ text = line [1 ][0 ] if line [1 ] else ""
102+ if text :
103+ ocr_text += text + " "
104+
105+ text_length = len (ocr_text .strip ())
106+ if text_length > 0 :
107+ logger .info (f"OCR extracted { text_length } characters" )
108+ return ocr_text .strip ()
112109 else :
113- logger .warning ("OCR did not return any result" )
114- return ""
110+ logger .warning ("OCR returned empty result" )
111+ return ""
112+
115113 except Exception as e :
116114 logger .error (f"OCR recognition error: { str (e )} " )
117115 return ""
116+
118117class NanonetsOCRBackend (OCRBackend ):
119118 """Nanonets OCR backend implementation using OpenAI API format"""
120119
0 commit comments